hadoop/hadoop-huaweicloud/index.html

889 lines
40 KiB
HTML
Raw Normal View History

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-02-14
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop OBS support &#x2013; OBSA: HuaweiCloud OBS Adapter for Hadoop Support</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230214" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
&nbsp;| Last Published: 2023-02-14
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>OBSA: HuaweiCloud OBS Adapter for Hadoop Support</h1>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Features">Features</a></li>
<li><a href="#Limitations">Limitations</a></li>
<li><a href="#Getting_Started">Getting Started</a>
<ul>
<li><a href="#Packages">Packages</a></li>
<li><a href="#Accessing_OBS_URLs">Accessing OBS URLs</a></li>
<li><a href="#Authenticating_with_OBS">Authenticating with OBS</a></li></ul></li>
<li><a href="#General_OBSA_Client_Configuration">General OBSA Client Configuration</a></li>
<li><a href="#Testing_the_hadoop-huaweicloud_Module">Testing the hadoop-huaweicloud Module</a></li></ul>
<section>
<h2><a name="Introduction"></a>Introduction</h2>
<p>The <code>hadoop-huaweicloud</code> module provides support for integration with the <a class="externalLink" href="https://www.huaweicloud.com/en-us/product/obs.html">HuaweiCloud Object Storage Service (OBS)</a>. This support comes via the JAR file <code>hadoop-huaweicloud.jar</code>.</p></section><section>
<h2><a name="Features"></a>Features</h2>
<ul>
<li>Read and write data stored in a HuaweiCloud OBS account.</li>
<li>Reference file system paths using URLs using the <code>obs</code> scheme.</li>
<li>Present a hierarchical file system view by implementing the standard Hadoop <code>FileSystem</code> interface.</li>
<li>Support multipart upload for a large file.</li>
<li>Can act as a source of data in a MapReduce job, or a sink.</li>
<li>Uses HuaweiCloud OBS&#x2019;s Java SDK with support for latest OBS features and authentication schemes.</li>
<li>Tested for scale.</li>
</ul></section><section>
<h2><a name="Limitations"></a>Limitations</h2>
<p>Partial or no support for the following operations :</p>
<ul>
<li>Symbolic link operations.</li>
<li>Proxy users.</li>
<li>File truncate.</li>
<li>File concat.</li>
<li>File checksum.</li>
<li>File replication factor.</li>
<li>Extended Attributes(XAttrs) operations.</li>
<li>Snapshot operations.</li>
<li>Storage policy.</li>
<li>Quota.</li>
<li>POSIX ACL.</li>
<li>Delegation token operations.</li>
</ul></section><section>
<h2><a name="Getting_Started"></a>Getting Started</h2><section>
<h3><a name="Packages"></a>Packages</h3>
<p>OBSA depends upon two JARs, alongside <code>hadoop-common</code> and its dependencies.</p>
<ul>
<li><code>hadoop-huaweicloud</code> JAR.</li>
<li><code>esdk-obs-java</code> JAR.</li>
</ul>
<p>The versions of <code>hadoop-common</code> and <code>hadoop-huaweicloud</code> must be identical.</p>
<p>To import the libraries into a Maven build, add <code>hadoop-huaweicloud</code> JAR to the build dependencies; it will pull in a compatible <code>esdk-obs-java</code> JAR.</p>
<p>The <code>hadoop-huaweicloud</code> JAR <i>does not</i> declare any dependencies other than that dependencies unique to it, the OBS SDK JAR. This is simplify excluding/tuning Hadoop dependency JARs in downstream applications. The <code>hadoop-client</code> or <code>hadoop-common</code> dependency must be declared.</p>
<div class="source">
<div class="source">
<pre>&lt;properties&gt;
&lt;!-- Your exact Hadoop version here--&gt;
&lt;hadoop.version&gt;3.4.0&lt;/hadoop.version&gt;
&lt;/properties&gt;
&lt;dependencies&gt;
&lt;dependency&gt;
&lt;groupId&gt;org.apache.hadoop&lt;/groupId&gt;
&lt;artifactId&gt;hadoop-client&lt;/artifactId&gt;
&lt;version&gt;${hadoop.version}&lt;/version&gt;
&lt;/dependency&gt;
&lt;dependency&gt;
&lt;groupId&gt;org.apache.hadoop&lt;/groupId&gt;
&lt;artifactId&gt;hadoop-huaweicloud&lt;/artifactId&gt;
&lt;version&gt;${hadoop.version}&lt;/version&gt;
&lt;/dependency&gt;
&lt;/dependencies&gt;
</pre></div></div>
</section><section>
<h3><a name="Accessing_OBS_URLs"></a>Accessing OBS URLs</h3>
<p>Before access a URL, OBS implementation classes of Filesystem/AbstractFileSystem and a region endpoint where a bucket is located shoud be configured as follows:</p>
<div class="source">
<div class="source">
<pre>&lt;property&gt;
&lt;name&gt;fs.obs.impl&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.fs.obs.OBSFileSystem&lt;/value&gt;
&lt;description&gt;The OBS implementation class of the Filesystem.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.AbstractFileSystem.obs.impl&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.fs.obs.OBS&lt;/value&gt;
&lt;description&gt;The OBS implementation class of the AbstractFileSystem.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.endpoint&lt;/name&gt;
&lt;value&gt;obs.region.myhuaweicloud.com&lt;/value&gt;
&lt;description&gt;OBS region endpoint where a bucket is located.&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
<p>OBS URLs can then be accessed as follows:</p>
<div class="source">
<div class="source">
<pre>obs://&lt;bucket_name&gt;/path
</pre></div></div>
<p>The scheme <code>obs</code> identifies a URL on a Hadoop-compatible file system <code>OBSFileSystem</code> backed by HuaweiCloud OBS. For example, the following <a href="../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a> commands demonstrate access to a bucket named <code>mybucket</code>.</p>
<div class="source">
<div class="source">
<pre>hadoop fs -mkdir obs://mybucket/testDir
hadoop fs -put testFile obs://mybucket/testDir/testFile
hadoop fs -cat obs://mybucket/testDir/testFile
test file content
</pre></div></div>
<p>For details on how to create a bucket, see <a class="externalLink" href="https://support.huaweicloud.com/intl/en-us/qs-obs/obs_qs_0003.html"><b>Help Center &gt; Object Storage Service &gt; Getting Started&gt; Basic Operation Procedure</b></a></p></section><section>
<h3><a name="Authenticating_with_OBS"></a>Authenticating with OBS</h3>
<p>Except when interacting with public OBS buckets, the OBSA client needs the credentials needed to interact with buckets. The client supports multiple authentication mechanisms. The simplest authentication mechanisms is to provide OBS access key and secret key as follows.</p>
<div class="source">
<div class="source">
<pre>&lt;property&gt;
&lt;name&gt;fs.obs.access.key&lt;/name&gt;
&lt;description&gt;OBS access key.
Omit for provider-based authentication.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.secret.key&lt;/name&gt;
&lt;description&gt;OBS secret key.
Omit for provider-based authentication.&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
<p><b>Do not share access key, secret key, and session token. They must be kept secret.</b></p>
<p>Custom implementations of <code>com.obs.services.IObsCredentialsProvider</code> (see <a class="externalLink" href="https://support.huaweicloud.com/intl/en-us/sdk-java-devg-obs/en-us_topic_0142815570.html"><b>Creating an Instance of ObsClient</b></a>) or <code>org.apache.hadoop.fs.obs.BasicSessionCredential</code> may also be used for authentication.</p>
<div class="source">
<div class="source">
<pre>&lt;property&gt;
&lt;name&gt;fs.obs.security.provider&lt;/name&gt;
&lt;description&gt;
Class name of security provider class which implements
com.obs.services.IObsCredentialsProvider, which will
be used to construct an OBS client instance as an input parameter.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.credentials.provider&lt;/name&gt;
&lt;description&gt;
lass nameCof credential provider class which implements
org.apache.hadoop.fs.obs.BasicSessionCredential,
which must override three APIs: getOBSAccessKeyId(),
getOBSSecretKey(), and getSessionToken().
&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
</section></section><section>
<h2><a name="General_OBSA_Client_Configuration"></a>General OBSA Client Configuration</h2>
<p>All OBSA client options are configured with options with the prefix <code>fs.obs.</code>.</p>
<div class="source">
<div class="source">
<pre>&lt;property&gt;
&lt;name&gt;fs.obs.connection.ssl.enabled&lt;/name&gt;
&lt;value&gt;false&lt;/value&gt;
&lt;description&gt;Enable or disable SSL connections to OBS.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.connection.maximum&lt;/name&gt;
&lt;value&gt;1000&lt;/value&gt;
&lt;description&gt;Maximum number of simultaneous connections to OBS.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.connection.establish.timeout&lt;/name&gt;
&lt;value&gt;120000&lt;/value&gt;
&lt;description&gt;Socket connection setup timeout in milliseconds.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.connection.timeout&lt;/name&gt;
&lt;value&gt;120000&lt;/value&gt;
&lt;description&gt;Socket connection timeout in milliseconds.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.idle.connection.time&lt;/name&gt;
&lt;value&gt;30000&lt;/value&gt;
&lt;description&gt;Socket idle connection time.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.max.idle.connections&lt;/name&gt;
&lt;value&gt;1000&lt;/value&gt;
&lt;description&gt;Maximum number of socket idle connections.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.socket.send.buffer&lt;/name&gt;
&lt;value&gt;256 * 1024&lt;/value&gt;
&lt;description&gt;Socket send buffer to be used in OBS SDK. Represented in bytes.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.socket.recv.buffer&lt;/name&gt;
&lt;value&gt;256 * 1024&lt;/value&gt;
&lt;description&gt;Socket receive buffer to be used in OBS SDK. Represented in bytes.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.threads.keepalivetime&lt;/name&gt;
&lt;value&gt;60&lt;/value&gt;
&lt;description&gt;Number of seconds a thread can be idle before being
terminated in thread pool.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.threads.max&lt;/name&gt;
&lt;value&gt;20&lt;/value&gt;
&lt;description&gt; Maximum number of concurrent active (part)uploads,
which each use a thread from thread pool.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.max.total.tasks&lt;/name&gt;
&lt;value&gt;20&lt;/value&gt;
&lt;description&gt;Number of (part)uploads allowed to the queue before
blocking additional uploads.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.delete.threads.max&lt;/name&gt;
&lt;value&gt;20&lt;/value&gt;
&lt;description&gt;Max number of delete threads.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.multipart.size&lt;/name&gt;
&lt;value&gt;104857600&lt;/value&gt;
&lt;description&gt;Part size for multipart upload.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.multiobjectdelete.maximum&lt;/name&gt;
&lt;value&gt;1000&lt;/value&gt;
&lt;description&gt;Max number of objects in one multi-object delete call.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.fast.upload.buffer&lt;/name&gt;
&lt;value&gt;disk&lt;/value&gt;
&lt;description&gt;Which buffer to use. Default is `disk`, value may be
`disk` | `array` | `bytebuffer`.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.buffer.dir&lt;/name&gt;
&lt;value&gt;dir1,dir2,dir3&lt;/value&gt;
&lt;description&gt;Comma separated list of directories that will be used to buffer file
uploads to. This option takes effect only when the option 'fs.obs.fast.upload.buffer'
is set to 'disk'.
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.fast.upload.active.blocks&lt;/name&gt;
&lt;value&gt;4&lt;/value&gt;
&lt;description&gt;Maximum number of blocks a single output stream can have active
(uploading, or queued to the central FileSystem instance's pool of queued
operations).
&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.readahead.range&lt;/name&gt;
&lt;value&gt;1024 * 1024&lt;/value&gt;
&lt;description&gt;Bytes to read ahead during a seek() before closing and
re-opening the OBS HTTP connection. &lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.read.transform.enable&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;description&gt;Flag indicating if socket connections can be reused by
position read. Set `false` only for HBase.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.list.threads.core&lt;/name&gt;
&lt;value&gt;30&lt;/value&gt;
&lt;description&gt;Number of core list threads.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.list.threads.max&lt;/name&gt;
&lt;value&gt;60&lt;/value&gt;
&lt;description&gt;Maximum number of list threads.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.list.workqueue.capacity&lt;/name&gt;
&lt;value&gt;1024&lt;/value&gt;
&lt;value&gt;Capacity of list work queue.&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.list.parallel.factor&lt;/name&gt;
&lt;value&gt;30&lt;/value&gt;
&lt;description&gt;List parallel factor.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.trash.enable&lt;/name&gt;
&lt;value&gt;false&lt;/value&gt;
&lt;description&gt;Switch for the fast delete.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.trash.dir&lt;/name&gt;
&lt;description&gt;The fast delete recycle directory.&lt;/description&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;fs.obs.block.size&lt;/name&gt;
&lt;value&gt;128 * 1024 * 1024&lt;/value&gt;
&lt;description&gt;Default block size for OBS FileSystem.
&lt;/description&gt;
&lt;/property&gt;
</pre></div></div>
</section><section>
<h2><a name="Testing_the_hadoop-huaweicloud_Module"></a>Testing the hadoop-huaweicloud Module</h2>
<p>The <code>hadoop-huaweicloud</code> module includes a full suite of unit tests. Most of the tests will run against the HuaweiCloud OBS. To run these tests, please create <code>src/test/resources/auth-keys.xml</code> with OBS account information mentioned in the above sections and the following properties.</p>
<div class="source">
<div class="source">
<pre>&lt;property&gt;
&lt;name&gt;fs.contract.test.fs.obs&lt;/name&gt;
&lt;value&gt;obs://obsfilesystem-bucket&lt;/value&gt;
&lt;/property&gt;
</pre></div></div></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>