Merge pull request #6 from apache/trunk

Merge
This commit is contained in:
Yizhong Zhang 2022-03-15 11:21:09 +08:00 committed by GitHub
commit 2a425d0cb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3602 changed files with 299009 additions and 113879 deletions

View File

@ -1,6 +1,19 @@
## NOTICE
<!--
Thanks for sending a pull request!
1. If this is your first time, please read our contributor guidelines: https://cwiki.apache.org/confluence/display/HADOOP/How+To+Contribute
2. Make sure your PR title starts with JIRA issue id, e.g., 'HADOOP-17799. Your PR title ...'.
-->
### Description of PR
### How was this patch tested?
### For code changes:
- [ ] Does the title or this PR starts with the corresponding JIRA issue id (e.g. 'HADOOP-17799. Your PR title ...')?
- [ ] Object storage: have the integration tests been executed and the endpoint declared according to the connector-specific documentation?
- [ ] If adding new dependencies to the code, are these dependencies licensed in a way that is compatible for inclusion under [ASF 2.0](http://www.apache.org/legal/resolved.html#category-a)?
- [ ] If applicable, have you updated the `LICENSE`, `LICENSE-binary`, `NOTICE-binary` files?
Please create an issue in ASF JIRA before opening a pull request,
and you need to set the title of the pull request which starts with
the corresponding JIRA issue number. (e.g. HADOOP-XXXXX. Fix a typo in YYY.)
For more details, please see https://cwiki.apache.org/confluence/display/HADOOP/How+To+Contribute

View File

@ -8,10 +8,10 @@ Requirements:
* Maven 3.3 or later
* Boost 1.72 (if compiling native code)
* Protocol Buffers 3.7.1 (if compiling native code)
* CMake 3.1 or newer (if compiling native code)
* CMake 3.19 or newer (if compiling native code)
* Zlib devel (if compiling native code)
* Cyrus SASL devel (if compiling native code)
* One of the compilers that support thread_local storage: GCC 4.8.1 or later, Visual Studio,
* One of the compilers that support thread_local storage: GCC 9.3.0 or later, Visual Studio,
Clang (community version), Clang (version for iOS 9 and later) (if compiling native code)
* openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
* Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
@ -51,39 +51,47 @@ Known issues:
and run your IDE and Docker etc inside that VM.
----------------------------------------------------------------------------------
Installing required packages for clean install of Ubuntu 14.04 LTS Desktop:
Installing required packages for clean install of Ubuntu 18.04 LTS Desktop.
(For Ubuntu 20.04, gcc/g++ and cmake bundled with Ubuntu can be used.
Refer to dev-support/docker/Dockerfile):
* Oracle JDK 1.8 (preferred)
$ sudo apt-get purge openjdk*
$ sudo apt-get install software-properties-common
$ sudo add-apt-repository ppa:webupd8team/java
* Open JDK 1.8
$ sudo apt-get update
$ sudo apt-get install oracle-java8-installer
$ sudo apt-get -y install openjdk-8-jdk
* Maven
$ sudo apt-get -y install maven
* Native libraries
$ sudo apt-get -y install build-essential autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev libsasl2-dev
* GCC 9.3.0
$ sudo apt-get -y install software-properties-common
$ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
$ sudo apt-get update
$ sudo apt-get -y install g++-9 gcc-9
$ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9
* CMake 3.19
$ curl -L https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz > cmake-3.19.0.tar.gz
$ tar -zxvf cmake-3.19.0.tar.gz && cd cmake-3.19.0
$ ./bootstrap
$ make -j$(nproc)
$ sudo make install
* Protocol Buffers 3.7.1 (required to build native code)
$ mkdir -p /opt/protobuf-3.7-src \
&& curl -L -s -S \
https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \
-o /opt/protobuf-3.7.1.tar.gz \
&& tar xzf /opt/protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src \
&& cd /opt/protobuf-3.7-src \
&& ./configure\
&& make install \
&& rm -rf /opt/protobuf-3.7-src
$ curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz
$ mkdir protobuf-3.7-src
$ tar xzf protobuf-3.7.1.tar.gz --strip-components 1 -C protobuf-3.7-src && cd protobuf-3.7-src
$ ./configure
$ make -j$(nproc)
$ sudo make install
* Boost
$ curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2 \
&& tar --bzip2 -xf boost_1_72_0.tar.bz2 \
&& cd boost_1_72_0 \
&& ./bootstrap.sh --prefix=/usr/ \
&& ./b2 --without-python install
$ curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2
$ tar --bzip2 -xf boost_1_72_0.tar.bz2 && cd boost_1_72_0
$ ./bootstrap.sh --prefix=/usr/
$ ./b2 --without-python
$ sudo ./b2 --without-python install
Optional packages:
* Snappy compression (only used for hadoop-mapreduce-client-nativetask)
$ sudo apt-get install snappy libsnappy-dev
$ sudo apt-get install libsnappy-dev
* Intel ISA-L library for erasure coding
Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
(OR https://github.com/01org/isa-l)
@ -103,7 +111,7 @@ Maven main modules:
- hadoop-project (Parent POM for all Hadoop Maven modules. )
(All plugins & dependencies versions are defined here.)
- hadoop-project-dist (Parent POM for modules that generate distributions.)
- hadoop-annotations (Generates the Hadoop doclet used to generated the Javadocs)
- hadoop-annotations (Generates the Hadoop doclet used to generate the Javadocs)
- hadoop-assemblies (Maven assemblies used by the different modules)
- hadoop-maven-plugins (Maven plugins used in project)
- hadoop-build-tools (Build tools like checkstyle, etc.)
@ -120,7 +128,7 @@ Maven main modules:
----------------------------------------------------------------------------------
Where to run Maven from?
It can be run from any module. The only catch is that if not run from utrunk
It can be run from any module. The only catch is that if not run from trunk
all modules that are not part of the build run must be installed in the local
Maven cache or available in a Maven repository.
@ -131,11 +139,11 @@ Maven build goals:
* Compile : mvn compile [-Pnative]
* Run tests : mvn test [-Pnative] [-Pshelltest]
* Create JAR : mvn package
* Run findbugs : mvn compile findbugs:findbugs
* Run spotbugs : mvn compile spotbugs:spotbugs
* Run checkstyle : mvn compile checkstyle:checkstyle
* Install JAR in M2 cache : mvn install
* Deploy JAR to Maven repo : mvn deploy
* Run clover : mvn test -Pclover [-DcloverLicenseLocation=${user.name}/.clover.license]
* Run clover : mvn test -Pclover
* Run Rat : mvn apache-rat:check
* Build javadocs : mvn javadoc:javadoc
* Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui]
@ -176,7 +184,6 @@ Maven build goals:
we silently build a version of libhadoop.so that cannot make use of snappy.
This option is recommended if you plan on making use of snappy and want
to get more repeatable builds.
* Use -Dsnappy.prefix to specify a nonstandard location for the libsnappy
header files and library files. You do not need this option if you have
installed snappy using a package manager.
@ -319,40 +326,35 @@ to update SNAPSHOTs from external repos.
----------------------------------------------------------------------------------
Importing projects to eclipse
When you import the project to eclipse, install hadoop-maven-plugins at first.
At first, install artifacts including hadoop-maven-plugins at the top of the source tree.
$ cd hadoop-maven-plugins
$ mvn install
$ mvn clean install -DskipTests -DskipShade
Then, generate eclipse project files.
$ mvn eclipse:eclipse -DskipTests
At last, import to eclipse by specifying the root directory of the project via
[File] > [Import] > [Existing Projects into Workspace].
Then, import to eclipse by specifying the root directory of the project via
[File] > [Import] > [Maven] > [Existing Maven Projects].
----------------------------------------------------------------------------------
Building distributions:
Create binary distribution without native code and without documentation:
Create binary distribution without native code and without Javadocs:
$ mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true
Create binary distribution with native code and with documentation:
Create binary distribution with native code:
$ mvn package -Pdist,native,docs -DskipTests -Dtar
$ mvn package -Pdist,native -DskipTests -Dtar
Create source distribution:
$ mvn package -Psrc -DskipTests
Create source and binary distributions with native code and documentation:
Create source and binary distributions with native code:
$ mvn package -Pdist,native,docs,src -DskipTests -Dtar
$ mvn package -Pdist,native,src -DskipTests -Dtar
Create a local staging version of the website (in /tmp/hadoop-site)
$ mvn clean site -Preleasedocs; mvn site:stage -DstagingDirectory=/tmp/hadoop-site
$ mvn site site:stage -Preleasedocs,docs -DstagingDirectory=/tmp/hadoop-site
Note that the site needs to be built in a second pass after other artifacts.
@ -453,6 +455,17 @@ Building on CentOS 8
* Install libraries provided by CentOS 8.
$ sudo dnf install libtirpc-devel zlib-devel lz4-devel bzip2-devel openssl-devel cyrus-sasl-devel libpmem-devel
* Install GCC 9.3.0
$ sudo dnf -y install gcc-toolset-9-gcc gcc-toolset-9-gcc-c++
$ source /opt/rh/gcc-toolset-9/enable
* Install CMake 3.19
$ curl -L https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz > cmake-3.19.0.tar.gz
$ tar -zxvf cmake-3.19.0.tar.gz && cd cmake-3.19.0
$ ./bootstrap
$ make -j$(nproc)
$ sudo make install
* Install boost.
$ curl -L -o boost_1_72_0.tar.bz2 https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download
$ tar xjf boost_1_72_0.tar.bz2
@ -489,7 +502,7 @@ Requirements:
* Maven 3.0 or later
* Boost 1.72
* Protocol Buffers 3.7.1
* CMake 3.1 or newer
* CMake 3.19 or newer
* Visual Studio 2010 Professional or Higher
* Windows SDK 8.1 (if building CPU rate control for the container executor)
* zlib headers (if building native code bindings for zlib)

220
Jenkinsfile vendored
View File

@ -1,220 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
pipeline {
agent {
label 'Hadoop'
}
options {
buildDiscarder(logRotator(numToKeepStr: '5'))
timeout (time: 20, unit: 'HOURS')
timestamps()
checkoutToSubdirectory('src')
}
environment {
SOURCEDIR = 'src'
// will also need to change notification section below
PATCHDIR = 'out'
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
YETUS='yetus'
// Branch or tag name. Yetus release tags are 'rel/X.Y.Z'
YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce'
}
parameters {
string(name: 'JIRA_ISSUE_KEY',
defaultValue: '',
description: 'The JIRA issue that has a patch needing pre-commit testing. Example: HADOOP-1234')
}
stages {
stage ('install yetus') {
steps {
dir("${WORKSPACE}/${YETUS}") {
checkout([
$class: 'GitSCM',
branches: [[name: "${env.YETUS_VERSION}"]],
userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]]
)
}
}
}
stage ('precommit-run') {
steps {
withCredentials(
[usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
passwordVariable: 'GITHUB_TOKEN',
usernameVariable: 'GITHUB_USER'),
usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
passwordVariable: 'JIRA_PASSWORD',
usernameVariable: 'JIRA_USER')]) {
sh '''#!/usr/bin/env bash
set -e
TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/test-patch.sh"
# this must be clean for every run
if [[ -d "${WORKSPACE}/${PATCHDIR}" ]]; then
rm -rf "${WORKSPACE}/${PATCHDIR}"
fi
mkdir -p "${WORKSPACE}/${PATCHDIR}"
# if given a JIRA issue, process it. If CHANGE_URL is set
# (e.g., Github Branch Source plugin), process it.
# otherwise exit, because we don't want Hadoop to do a
# full build. We wouldn't normally do this check for smaller
# projects. :)
if [[ -n "${JIRA_ISSUE_KEY}" ]]; then
YETUS_ARGS+=("${JIRA_ISSUE_KEY}")
elif [[ -z "${CHANGE_URL}" ]]; then
echo "Full build skipped" > "${WORKSPACE}/${PATCHDIR}/report.html"
exit 0
fi
YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
# where the source is located
YETUS_ARGS+=("--basedir=${WORKSPACE}/${SOURCEDIR}")
# our project defaults come from a personality file
YETUS_ARGS+=("--project=hadoop")
YETUS_ARGS+=("--personality=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/hadoop.sh")
# lots of different output formats
YETUS_ARGS+=("--brief-report-file=${WORKSPACE}/${PATCHDIR}/brief.txt")
YETUS_ARGS+=("--console-report-file=${WORKSPACE}/${PATCHDIR}/console.txt")
YETUS_ARGS+=("--html-report-file=${WORKSPACE}/${PATCHDIR}/report.html")
# enable writing back to Github
YETUS_ARGS+=(--github-token="${GITHUB_TOKEN}")
# enable writing back to ASF JIRA
YETUS_ARGS+=(--jira-password="${JIRA_PASSWORD}")
YETUS_ARGS+=(--jira-user="${JIRA_USER}")
# auto-kill any surefire stragglers during unit test runs
YETUS_ARGS+=("--reapermode=kill")
# set relatively high limits for ASF machines
# changing these to higher values may cause problems
# with other jobs on systemd-enabled machines
YETUS_ARGS+=("--proclimit=5500")
YETUS_ARGS+=("--dockermemlimit=20g")
# -1 findbugs issues that show up prior to the patch being applied
YETUS_ARGS+=("--findbugs-strict-precheck")
# rsync these files back into the archive dir
YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,findbugsXml.xml")
# URL for user-side presentation in reports and such to our artifacts
# (needs to match the archive bits below)
YETUS_ARGS+=("--build-url-artifacts=artifact/out")
# plugins to enable
YETUS_ARGS+=("--plugins=all")
# use Hadoop's bundled shelldocs
YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs")
# don't let these tests cause -1s because we aren't really paying that
# much attention to them
YETUS_ARGS+=("--tests-filter=checkstyle")
# run in docker mode and specifically point to our
# Dockerfile since we don't want to use the auto-pulled version.
YETUS_ARGS+=("--docker")
YETUS_ARGS+=("--dockerfile=${DOCKERFILE}")
YETUS_ARGS+=("--mvn-custom-repos")
# effectively treat dev-suport as a custom maven module
YETUS_ARGS+=("--skip-dirs=dev-support")
# help keep the ASF boxes clean
YETUS_ARGS+=("--sentinel")
# use emoji vote so it is easier to find the broken line
YETUS_ARGS+=("--github-use-emoji-vote")
# test with Java 8 and 11
YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")
YETUS_ARGS+=("--multijdktests=compile")
# custom javadoc goals
YETUS_ARGS+=("--mvn-javadoc-goals=process-sources,javadoc:javadoc-no-fork")
"${TESTPATCHBIN}" "${YETUS_ARGS[@]}"
'''
}
}
}
}
post {
always {
script {
// Yetus output
archiveArtifacts "${env.PATCHDIR}/**"
// Publish the HTML report so that it can be looked at
// Has to be relative to WORKSPACE.
publishHTML (target: [
allowMissing: true,
keepAll: true,
alwaysLinkToLastBuild: true,
// Has to be relative to WORKSPACE
reportDir: "${env.PATCHDIR}",
reportFiles: 'report.html',
reportName: 'Yetus Report'
])
// Publish JUnit results
try {
junit "${env.SOURCEDIR}/**/target/surefire-reports/*.xml"
} catch(e) {
echo 'junit processing: ' + e.toString()
}
}
}
// Jenkins pipeline jobs fill slaves on PRs without this :(
cleanup() {
script {
sh '''
# See YETUS-764
if [ -f "${WORKSPACE}/${PATCHDIR}/pidfile.txt" ]; then
echo "test-patch process appears to still be running: killing"
kill `cat "${WORKSPACE}/${PATCHDIR}/pidfile.txt"` || true
sleep 10
fi
if [ -f "${WORKSPACE}/${PATCHDIR}/cidfile.txt" ]; then
echo "test-patch container appears to still be running: killing"
docker kill `cat "${WORKSPACE}/${PATCHDIR}/cidfile.txt"` || true
fi
# See HADOOP-13951
chmod -R u+rxw "${WORKSPACE}"
'''
deleteDir()
}
}
}
}

View File

@ -214,18 +214,18 @@ com.aliyun:aliyun-java-sdk-core:3.4.0
com.aliyun:aliyun-java-sdk-ecs:4.2.0
com.aliyun:aliyun-java-sdk-ram:3.0.0
com.aliyun:aliyun-java-sdk-sts:3.0.0
com.aliyun.oss:aliyun-sdk-oss:3.4.1
com.aliyun.oss:aliyun-sdk-oss:3.13.2
com.amazonaws:aws-java-sdk-bundle:1.11.901
com.cedarsoftware:java-util:1.9.0
com.cedarsoftware:json-io:2.5.1
com.fasterxml.jackson.core:jackson-annotations:2.9.9
com.fasterxml.jackson.core:jackson-core:2.9.9
com.fasterxml.jackson.core:jackson-databind:2.9.9.2
com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.9.9
com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.9.9
com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.9.9
com.fasterxml.jackson.core:jackson-annotations:2.13.0
com.fasterxml.jackson.core:jackson-core:2.13.0
com.fasterxml.jackson.core:jackson-databind:2.13.0
com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.13.0
com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.13.0
com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.13.0
com.fasterxml.uuid:java-uuid-generator:3.1.4
com.fasterxml.woodstox:woodstox-core:5.0.3
com.fasterxml.woodstox:woodstox-core:5.3.0
com.github.davidmoten:rxjava-extras:0.8.0.17
com.github.stephenc.jcip:jcip-annotations:1.0-1
com.google:guice:4.0
@ -240,17 +240,16 @@ com.google.guava:guava:20.0
com.google.guava:guava:27.0-jre
com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava
com.microsoft.azure:azure-storage:7.0.0
com.nimbusds:nimbus-jose-jwt:4.41.1
com.nimbusds:nimbus-jose-jwt:9.8.1
com.squareup.okhttp:okhttp:2.7.5
com.squareup.okio:okio:1.6.0
com.zaxxer:HikariCP-java7:2.4.12
com.zaxxer:HikariCP:4.0.3
commons-beanutils:commons-beanutils:1.9.3
commons-cli:commons-cli:1.2
commons-codec:commons-codec:1.11
commons-collections:commons-collections:3.2.2
commons-daemon:commons-daemon:1.0.13
commons-io:commons-io:2.5
commons-lang:commons-lang:2.6
commons-io:commons-io:2.8.0
commons-logging:commons-logging:1.1.3
commons-net:commons-net:3.6
de.ruedigermoeller:fst:2.50
@ -283,30 +282,30 @@ javax.inject:javax.inject:1
log4j:log4j:1.2.17
net.java.dev.jna:jna:5.2.0
net.minidev:accessors-smart:1.2
net.minidev:json-smart:2.3
net.minidev:json-smart:2.4.7
org.apache.avro:avro:1.7.7
org.apache.commons:commons-collections4:4.2
org.apache.commons:commons-compress:1.19
org.apache.commons:commons-compress:1.21
org.apache.commons:commons-configuration2:2.1.1
org.apache.commons:commons-csv:1.0
org.apache.commons:commons-digester:1.8.1
org.apache.commons:commons-lang3:3.7
org.apache.commons:commons-lang3:3.12.0
org.apache.commons:commons-math3:3.1.1
org.apache.commons:commons-text:1.4
org.apache.commons:commons-validator:1.6
org.apache.curator:curator-client:2.13.0
org.apache.curator:curator-framework:2.13.0
org.apache.curator:curator-recipes:2.13.0
org.apache.curator:curator-client:5.2.0
org.apache.curator:curator-framework:5.2.0
org.apache.curator:curator-recipes:5.2.0
org.apache.geronimo.specs:geronimo-jcache_1.0_spec:1.0-alpha-1
org.apache.hbase:hbase-annotations:1.4.8
org.apache.hbase:hbase-client:1.4.8
org.apache.hbase:hbase-common:1.4.8
org.apache.hbase:hbase-protocol:1.4.8
org.apache.hbase:hbase-annotations:1.7.1
org.apache.hbase:hbase-client:1.7.1
org.apache.hbase:hbase-common:1.7.1
org.apache.hbase:hbase-protocol:1.7.1
org.apache.htrace:htrace-core:3.1.0-incubating
org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.6
org.apache.httpcomponents:httpcore:4.4.10
org.apache.kafka:kafka-clients:2.4.0
org.apache.kafka:kafka-clients:2.8.1
org.apache.kerby:kerb-admin:1.0.1
org.apache.kerby:kerb-client:1.0.1
org.apache.kerby:kerb-common:1.0.1
@ -322,29 +321,30 @@ org.apache.kerby:kerby-pkix:1.0.1
org.apache.kerby:kerby-util:1.0.1
org.apache.kerby:kerby-xdr:1.0.1
org.apache.kerby:token-provider:1.0.1
org.apache.solr:solr-solrj:8.8.2
org.apache.yetus:audience-annotations:0.5.0
org.apache.zookeeper:zookeeper:3.4.13
org.apache.zookeeper:zookeeper:3.6.3
org.codehaus.jackson:jackson-core-asl:1.9.13
org.codehaus.jackson:jackson-jaxrs:1.9.13
org.codehaus.jackson:jackson-mapper-asl:1.9.13
org.codehaus.jackson:jackson-xc:1.9.13
org.codehaus.jettison:jettison:1.1
org.eclipse.jetty:jetty-annotations:9.3.27.v20190418
org.eclipse.jetty:jetty-http:9.3.27.v20190418
org.eclipse.jetty:jetty-io:9.3.27.v20190418
org.eclipse.jetty:jetty-jndi:9.3.27.v20190418
org.eclipse.jetty:jetty-plus:9.3.27.v20190418
org.eclipse.jetty:jetty-security:9.3.27.v20190418
org.eclipse.jetty:jetty-server:9.3.27.v20190418
org.eclipse.jetty:jetty-servlet:9.3.27.v20190418
org.eclipse.jetty:jetty-util:9.3.27.v20190418
org.eclipse.jetty:jetty-util-ajax:9.3.27.v20190418
org.eclipse.jetty:jetty-webapp:9.3.27.v20190418
org.eclipse.jetty:jetty-xml:9.3.27.v20190418
org.eclipse.jetty.websocket:javax-websocket-client-impl:9.3.27.v20190418
org.eclipse.jetty.websocket:javax-websocket-server-impl:9.3.27.v20190418
org.eclipse.jetty:jetty-annotations:9.4.44.v20210927
org.eclipse.jetty:jetty-http:9.4.44.v20210927
org.eclipse.jetty:jetty-io:9.4.44.v20210927
org.eclipse.jetty:jetty-jndi:9.4.44.v20210927
org.eclipse.jetty:jetty-plus:9.4.44.v20210927
org.eclipse.jetty:jetty-security:9.4.44.v20210927
org.eclipse.jetty:jetty-server:9.4.44.v20210927
org.eclipse.jetty:jetty-servlet:9.4.44.v20210927
org.eclipse.jetty:jetty-util:9.4.44.v20210927
org.eclipse.jetty:jetty-util-ajax:9.4.44.v20210927
org.eclipse.jetty:jetty-webapp:9.4.44.v20210927
org.eclipse.jetty:jetty-xml:9.4.44.v20210927
org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.44.v20210927
org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.44.v20210927
org.ehcache:ehcache:3.3.1
org.lz4:lz4-java:1.6.0
org.lz4:lz4-java:1.7.1
org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.0.5
org.yaml:snakeyaml:1.16:
@ -364,9 +364,9 @@ hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/com
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h
com.github.luben:zstd-jni:1.4.3-1
com.github.luben:zstd-jni:1.4.9-1
dnsjava:dnsjava:2.1.7
org.codehaus.woodstox:stax2-api:3.1.4
org.codehaus.woodstox:stax2-api:4.2.1
BSD 3-Clause
@ -405,7 +405,7 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.5.1.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
@ -468,8 +468,8 @@ com.microsoft.azure:azure-cosmosdb-gateway:2.4.5
com.microsoft.azure:azure-data-lake-store-sdk:2.3.3
com.microsoft.azure:azure-keyvault-core:1.0.0
com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7
org.bouncycastle:bcpkix-jdk15on:1.60
org.bouncycastle:bcprov-jdk15on:1.60
org.bouncycastle:bcpkix-jdk15on:1.68
org.bouncycastle:bcprov-jdk15on:1.68
org.checkerframework:checker-qual:2.5.2
org.codehaus.mojo:animal-sniffer-annotations:1.17
org.jruby.jcodings:jcodings:1.0.13
@ -495,6 +495,7 @@ javax.annotation:javax.annotation-api:1.3.2
javax.servlet:javax.servlet-api:3.1.0
javax.servlet.jsp:jsp-api:2.1
javax.websocket:javax.websocket-api:1.0
javax.ws.rs:javax.ws.rs-api:2.1.1
javax.ws.rs:jsr311-api:1.1.1
javax.xml.bind:jaxb-api:2.2.11
@ -502,7 +503,7 @@ javax.xml.bind:jaxb-api:2.2.11
Eclipse Public License 1.0
--------------------------
junit:junit:4.12
junit:junit:4.13.2
HSQL License
@ -514,7 +515,7 @@ org.hsqldb:hsqldb:2.3.4
JDOM License
------------
org.jdom:jdom:1.1
org.jdom:jdom2:2.0.6.1
Public Domain

View File

@ -245,7 +245,7 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.5.1.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js

View File

@ -66,7 +66,7 @@ available from http://www.digip.org/jansson/.
AWS SDK for Java
Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
This product includes software developed by
Amazon Technologies, Inc (http://www.amazon.com/).

338
dev-support/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,338 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
def getGithubCreds() {
return [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
passwordVariable: 'GITHUB_TOKEN',
usernameVariable: 'GITHUB_USER')]
}
// Publish JUnit results only if there are XML files under surefire-reports
def publishJUnitResults() {
def findCmdExitCode = sh script: "find ${SOURCEDIR} -wholename */target/surefire-reports/*.xml | egrep .", returnStatus: true
boolean surefireReportsExist = findCmdExitCode == 0
if (surefireReportsExist) {
echo "XML files found under surefire-reports, running junit"
// The path should be relative to WORKSPACE for the junit.
SRC = "${SOURCEDIR}/**/target/surefire-reports/*.xml".replace("$WORKSPACE/","")
try {
junit "${SRC}"
} catch(e) {
echo 'junit processing: ' + e.toString()
}
} else {
echo "No XML files found under surefire-reports, skipping junit"
}
}
pipeline {
agent {
label 'Hadoop'
}
options {
buildDiscarder(logRotator(numToKeepStr: '5'))
timeout (time: 24, unit: 'HOURS')
timestamps()
checkoutToSubdirectory('src')
}
environment {
YETUS='yetus'
// Branch or tag name. Yetus release tags are 'rel/X.Y.Z'
YETUS_VERSION='f9ba0170a5787a5f4662d3769804fef0226a182f'
}
parameters {
string(name: 'JIRA_ISSUE_KEY',
defaultValue: '',
description: 'The JIRA issue that has a patch needing pre-commit testing. Example: HADOOP-1234')
}
stages {
stage ('install yetus') {
steps {
dir("${WORKSPACE}/${YETUS}") {
checkout([
$class: 'GitSCM',
branches: [[name: "${env.YETUS_VERSION}"]],
userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]]
)
}
}
}
// Setup codebase so that each platform's build happens in its own exclusive copy of the
// codebase.
// Primarily because YETUS messes up the git branch information and affects the subsequent
// optional stages after the first one.
stage ('setup sources') {
steps {
dir("${WORKSPACE}/centos-7") {
sh '''#!/usr/bin/env bash
cp -Rp ${WORKSPACE}/src ${WORKSPACE}/centos-7
'''
}
dir("${WORKSPACE}/centos-8") {
sh '''#!/usr/bin/env bash
cp -Rp ${WORKSPACE}/src ${WORKSPACE}/centos-8
'''
}
dir("${WORKSPACE}/debian-10") {
sh '''#!/usr/bin/env bash
cp -Rp ${WORKSPACE}/src ${WORKSPACE}/debian-10
'''
}
dir("${WORKSPACE}/ubuntu-focal") {
sh '''#!/usr/bin/env bash
cp -Rp ${WORKSPACE}/src ${WORKSPACE}/ubuntu-focal
'''
}
}
}
// This is an optional stage which runs only when there's a change in
// C++/C++ build/platform.
// This stage serves as a means of cross platform validation, which is
// really needed to ensure that any C++ related/platform change doesn't
// break the Hadoop build on Centos 7.
stage ('precommit-run Centos 7') {
environment {
SOURCEDIR = "${WORKSPACE}/centos-7/src"
PATCHDIR = "${WORKSPACE}/centos-7/out"
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile_centos_7"
IS_OPTIONAL = 1
}
steps {
withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" run_ci
'''
}
}
post {
// Since this is an optional platform, we want to copy the artifacts
// and archive it only if the build fails, to help with debugging.
failure {
sh '''#!/usr/bin/env bash
cp -Rp "${WORKSPACE}/centos-7/out" "${WORKSPACE}"
'''
archiveArtifacts "out/**"
}
cleanup() {
script {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" cleanup_ci_proc
'''
}
}
}
}
// This is an optional stage which runs only when there's a change in
// C++/C++ build/platform.
// This stage serves as a means of cross platform validation, which is
// really needed to ensure that any C++ related/platform change doesn't
// break the Hadoop build on Centos 8.
stage ('precommit-run Centos 8') {
environment {
SOURCEDIR = "${WORKSPACE}/centos-8/src"
PATCHDIR = "${WORKSPACE}/centos-8/out"
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile_centos_8"
IS_OPTIONAL = 1
}
steps {
withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" run_ci
'''
}
}
post {
// Since this is an optional platform, we want to copy the artifacts
// and archive it only if the build fails, to help with debugging.
failure {
sh '''#!/usr/bin/env bash
cp -Rp "${WORKSPACE}/centos-8/out" "${WORKSPACE}"
'''
archiveArtifacts "out/**"
}
cleanup() {
script {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" cleanup_ci_proc
'''
}
}
}
}
// This is an optional stage which runs only when there's a change in
// C++/C++ build/platform.
// This stage serves as a means of cross platform validation, which is
// really needed to ensure that any C++ related/platform change doesn't
// break the Hadoop build on Debian 10.
stage ('precommit-run Debian 10') {
environment {
SOURCEDIR = "${WORKSPACE}/debian-10/src"
PATCHDIR = "${WORKSPACE}/debian-10/out"
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile_debian_10"
IS_OPTIONAL = 1
}
steps {
withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" run_ci
'''
}
}
post {
// Since this is an optional platform, we want to copy the artifacts
// and archive it only if the build fails, to help with debugging.
failure {
sh '''#!/usr/bin/env bash
cp -Rp "${WORKSPACE}/debian-10/out" "${WORKSPACE}"
'''
archiveArtifacts "out/**"
}
cleanup() {
script {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" cleanup_ci_proc
'''
}
}
}
}
// We want to use Ubuntu Focal as our main CI and thus, this stage
// isn't optional (runs for all the PRs).
stage ('precommit-run Ubuntu focal') {
environment {
SOURCEDIR = "${WORKSPACE}/ubuntu-focal/src"
PATCHDIR = "${WORKSPACE}/ubuntu-focal/out"
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
IS_OPTIONAL = 0
}
steps {
withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" run_ci
'''
}
}
post {
always {
script {
// Publish status if it was missed (YETUS-1059)
withCredentials(
[usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
passwordVariable: 'GITHUB_TOKEN',
usernameVariable: 'GITHUB_USER')]) {
sh '''#!/usr/bin/env bash
# Copy the artifacts of Ubuntu focal build to workspace
cp -Rp "${WORKSPACE}/ubuntu-focal/out" "${WORKSPACE}"
# Send Github status
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" github_status_recovery
'''
}
// YETUS output
archiveArtifacts "out/**"
// Publish the HTML report so that it can be looked at
// Has to be relative to WORKSPACE.
publishHTML (target: [
allowMissing: true,
keepAll: true,
alwaysLinkToLastBuild: true,
// Has to be relative to WORKSPACE
reportDir: "out",
reportFiles: 'report.html',
reportName: 'Yetus Report'
])
publishJUnitResults()
}
}
cleanup() {
script {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
"${SOURCEDIR}/dev-support/jenkins.sh" cleanup_ci_proc
'''
}
}
}
}
}
post {
// Jenkins pipeline jobs fill slaves on PRs without this :(
cleanup() {
script {
sh '''#!/usr/bin/env bash
# See HADOOP-13951
chmod -R u+rxw "${WORKSPACE}"
'''
deleteDir()
}
}
}
}

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -30,33 +30,16 @@ import re
import shutil
import subprocess
import sys
import urllib2
try:
import argparse
except ImportError:
sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.")
sys.exit(2)
import urllib.request
import argparse
# Various relative paths
REPO_DIR = os.getcwd()
def check_output(*popenargs, **kwargs):
r"""Run command with arguments and return its output as a byte string.
Backported from Python 2.7 as it's implemented as pure python on stdlib.
>>> check_output(['/usr/bin/python', '--version'])
Python 2.6.2
"""
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, _ = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
error = subprocess.CalledProcessError(retcode, cmd)
error.output = output
raise error
return output
""" Run command with arguments and return its output as a string. """
return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
def get_repo_dir():
""" Return the path to the top of the repo. """
@ -139,7 +122,7 @@ def checkout_java_acc(force):
url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
scratch_dir = get_scratch_dir()
path = os.path.join(scratch_dir, os.path.basename(url))
jacc = urllib2.urlopen(url)
jacc = urllib.request.urlopen(url)
with open(path, 'wb') as w:
w.write(jacc.read())
@ -192,9 +175,9 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
if annotations is not None:
annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
with file(annotations_path, "w") as f:
with open(annotations_path, "w") as f:
for ann in annotations:
print >>f, ann
print(ann, file=f)
args += ["-annotations-list", annotations_path]
subprocess.check_call(args)
@ -264,8 +247,8 @@ def main():
parser.add_argument("--skip-build",
action="store_true",
help="Skip building the projects.")
parser.add_argument("src_rev", nargs=1, help="Source revision.")
parser.add_argument("dst_rev", nargs="?", default="HEAD",
parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
help="Destination revision. " +
"If not specified, will use HEAD.")

View File

@ -514,7 +514,7 @@ function dockermode
echo "USER ${user_name}"
printf "\n\n"
) | docker build -t "${imgname}" -
) | docker build -t "${imgname}" -f - "${BASEDIR}"/dev-support/docker/
run docker run -i -t \
--privileged \

View File

@ -164,7 +164,7 @@ fi
# Windows doesn't have a LIB_DIR, everything goes into bin
if [[ -d "${BIN_DIR}" ]] ; then
if [[ -d "${BIN_DIR}" && $(ls -A "${BIN_DIR}") ]] ; then
mkdir -p "${TARGET_BIN_DIR}"
cd "${BIN_DIR}" || exit 1
${TAR} ./* | (cd "${TARGET_BIN_DIR}"/ || exit 1; ${UNTAR})

View File

@ -355,6 +355,7 @@ function personality_modules
fi
;;
unit)
extra="-Dsurefire.rerunFailingTestsCount=2"
if [[ "${BUILDMODE}" = full ]]; then
ordering=mvnsrc
elif [[ "${CHANGED_MODULES[*]}" =~ \. ]]; then
@ -363,7 +364,7 @@ function personality_modules
if [[ ${TEST_PARALLEL} = "true" ]] ; then
if hadoop_test_parallel; then
extra="-Pparallel-tests"
extra="${extra} -Pparallel-tests"
if [[ -n ${TEST_THREADS:-} ]]; then
extra="${extra} -DtestsThreadCount=${TEST_THREADS}"
fi
@ -482,7 +483,7 @@ function personality_file_tests
fi
if [[ ${filename} =~ \.java$ ]]; then
add_test findbugs
add_test spotbugs
fi
}
@ -512,7 +513,7 @@ function shadedclient_initialize
maven_add_install shadedclient
}
## @description build client facing shaded artifacts and test them
## @description build client facing shaded and non-shaded artifacts and test them
## @audience private
## @stability evolving
## @param repostatus
@ -545,12 +546,19 @@ function shadedclient_rebuild
return 0
fi
big_console_header "Checking client artifacts on ${repostatus}"
big_console_header "Checking client artifacts on ${repostatus} with shaded clients"
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
"${modules[@]}" \
-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dfindbugs.skip=true
-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients"
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
"${modules[@]}" \
-DskipShade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
count=$("${GREP}" -c '\[ERROR\]' "${logfile}")
if [[ ${count} -gt 0 ]]; then

View File

@ -15,4 +15,4 @@
# limitations under the License.
BINDIR=$(cd -P -- "$(dirname -- "${BASH_SOURCE-0}")" >/dev/null && pwd -P)
exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dir=dev-support "$@"
exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dirs=dev-support "$@"

View File

@ -77,7 +77,7 @@ WANTED="$1"
shift
ARGV=("$@")
HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0}
HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0}
BIN=$(yetus_abs "${BASH_SOURCE-$0}")
BINDIR=$(dirname "${BIN}")

View File

@ -0,0 +1,75 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<code_scheme name="Hadoop" version="173">
<option name="RIGHT_MARGIN" value="100" />
<JavaCodeStyleSettings>
<option name="CLASS_COUNT_TO_USE_IMPORT_ON_DEMAND" value="999" />
<option name="NAMES_COUNT_TO_USE_IMPORT_ON_DEMAND" value="999" />
<option name="PACKAGES_TO_USE_IMPORT_ON_DEMAND">
<value />
</option>
<option name="IMPORT_LAYOUT_TABLE">
<value>
<package name="java" withSubpackages="true" static="false" />
<package name="javax" withSubpackages="true" static="false" />
<emptyLine />
<package name="org.apache.hadoop.thirdparty" withSubpackages="true" static="false" />
<package name="" withSubpackages="true" static="false" />
<emptyLine />
<package name="org.apache" withSubpackages="true" static="false" />
<emptyLine />
<package name="" withSubpackages="true" static="true" />
</value>
</option>
</JavaCodeStyleSettings>
<codeStyleSettings language="JAVA">
<option name="RIGHT_MARGIN" value="100" />
<option name="KEEP_LINE_BREAKS" value="false" />
<option name="KEEP_FIRST_COLUMN_COMMENT" value="false" />
<option name="KEEP_CONTROL_STATEMENT_IN_ONE_LINE" value="false" />
<option name="KEEP_BLANK_LINES_IN_DECLARATIONS" value="1" />
<option name="KEEP_BLANK_LINES_IN_CODE" value="1" />
<option name="KEEP_BLANK_LINES_BEFORE_RBRACE" value="1" />
<option name="INDENT_CASE_FROM_SWITCH" value="false" />
<option name="ALIGN_MULTILINE_PARAMETERS" value="false" />
<option name="ALIGN_MULTILINE_RESOURCES" value="false" />
<option name="SPACE_BEFORE_ARRAY_INITIALIZER_LBRACE" value="true" />
<option name="CALL_PARAMETERS_WRAP" value="1" />
<option name="METHOD_PARAMETERS_WRAP" value="1" />
<option name="RESOURCE_LIST_WRAP" value="5" />
<option name="EXTENDS_LIST_WRAP" value="1" />
<option name="THROWS_LIST_WRAP" value="1" />
<option name="EXTENDS_KEYWORD_WRAP" value="1" />
<option name="THROWS_KEYWORD_WRAP" value="1" />
<option name="METHOD_CALL_CHAIN_WRAP" value="1" />
<option name="BINARY_OPERATION_WRAP" value="1" />
<option name="BINARY_OPERATION_SIGN_ON_NEXT_LINE" value="true" />
<option name="TERNARY_OPERATION_WRAP" value="5" />
<option name="ARRAY_INITIALIZER_WRAP" value="1" />
<option name="ASSIGNMENT_WRAP" value="1" />
<option name="METHOD_ANNOTATION_WRAP" value="2" />
<option name="CLASS_ANNOTATION_WRAP" value="2" />
<option name="FIELD_ANNOTATION_WRAP" value="2" />
<option name="VARIABLE_ANNOTATION_WRAP" value="2" />
<indentOptions>
<option name="INDENT_SIZE" value="2" />
<option name="CONTINUATION_INDENT_SIZE" value="4" />
<option name="TAB_SIZE" value="2" />
</indentOptions>
</codeStyleSettings>
</code_scheme>

View File

@ -1,245 +0,0 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Given a jenkins test job, this script examines all runs of the job done
# within specified period of time (number of days prior to the execution
# time of this script), and reports all failed tests.
#
# The output of this script includes a section for each run that has failed
# tests, with each failed test name listed.
#
# More importantly, at the end, it outputs a summary section to list all failed
# tests within all examined runs, and indicate how many runs a same test
# failed, and sorted all failed tests by how many runs each test failed.
#
# This way, when we see failed tests in PreCommit build, we can quickly tell
# whether a failed test is a new failure, or it failed before and how often it
# failed, so to have idea whether it may just be a flaky test.
#
# Of course, to be 100% sure about the reason of a test failure, closer look
# at the failed test for the specific run is necessary.
#
import sys
import platform
sysversion = sys.hexversion
onward30 = False
if sysversion < 0x020600F0:
sys.exit("Minimum supported python version is 2.6, the current version is " +
"Python" + platform.python_version())
if sysversion == 0x030000F0:
sys.exit("There is a known bug with Python" + platform.python_version() +
", please try a different version");
if sysversion < 0x03000000:
import urllib2
else:
onward30 = True
import urllib.request
import datetime
import json as simplejson
import logging
from optparse import OptionParser
import time
# Configuration
DEFAULT_JENKINS_URL = "https://builds.apache.org"
DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
DEFAULT_NUM_PREVIOUS_DAYS = 14
DEFAULT_TOP_NUM_FAILED_TEST = -1
SECONDS_PER_DAY = 86400
# total number of runs to examine
numRunsToExamine = 0
#summary mode
summary_mode = False
#total number of errors
error_count = 0
""" Parse arguments """
def parse_args():
parser = OptionParser()
parser.add_option("-J", "--jenkins-url", type="string",
dest="jenkins_url", help="Jenkins URL",
default=DEFAULT_JENKINS_URL)
parser.add_option("-j", "--job-name", type="string",
dest="job_name", help="Job name to look at",
default=DEFAULT_JOB_NAME)
parser.add_option("-n", "--num-days", type="int",
dest="num_prev_days", help="Number of days to examine",
default=DEFAULT_NUM_PREVIOUS_DAYS)
parser.add_option("-t", "--top", type="int",
dest="num_failed_tests",
help="Summary Mode, only show top number of failed tests",
default=DEFAULT_TOP_NUM_FAILED_TEST)
(options, args) = parser.parse_args()
if args:
parser.error("unexpected arguments: " + repr(args))
return options
""" Load data from specified url """
def load_url_data(url):
if onward30:
ourl = urllib.request.urlopen(url)
codec = ourl.info().get_param('charset')
content = ourl.read().decode(codec)
data = simplejson.loads(content, strict=False)
else:
ourl = urllib2.urlopen(url)
data = simplejson.load(ourl, strict=False)
return data
""" List all builds of the target project. """
def list_builds(jenkins_url, job_name):
global summary_mode
url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
jenkins=jenkins_url,
job_name=job_name)
try:
data = load_url_data(url)
except:
if not summary_mode:
logging.error("Could not fetch: %s" % url)
error_count += 1
raise
return data['builds']
""" Find the names of any tests which failed in the given build output URL. """
def find_failing_tests(testReportApiJson, jobConsoleOutput):
global summary_mode
global error_count
ret = set()
try:
data = load_url_data(testReportApiJson)
except:
if not summary_mode:
logging.error(" Could not open testReport, check " +
jobConsoleOutput + " for why it was reported failed")
error_count += 1
return ret
for suite in data['suites']:
for cs in suite['cases']:
status = cs['status']
errDetails = cs['errorDetails']
if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
ret.add(cs['className'] + "." + cs['name'])
if len(ret) == 0 and (not summary_mode):
logging.info(" No failed tests in testReport, check " +
jobConsoleOutput + " for why it was reported failed.")
return ret
""" Iterate runs of specfied job within num_prev_days and collect results """
def find_flaky_tests(jenkins_url, job_name, num_prev_days):
global numRunsToExamine
global summary_mode
all_failing = dict()
# First list all builds
builds = list_builds(jenkins_url, job_name)
# Select only those in the last N days
min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]
# Filter out only those that failed
failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
if (b['result'] in ('UNSTABLE', 'FAILURE'))]
tnum = len(builds)
num = len(failing_build_urls)
numRunsToExamine = tnum
if not summary_mode:
logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
+ ") that have failed tests in the past " + str(num_prev_days) + " days"
+ ((".", ", as listed below:\n")[num > 0]))
for failed_build_with_time in failing_build_urls:
failed_build = failed_build_with_time[0];
jobConsoleOutput = failed_build + "Console";
testReport = failed_build + "testReport";
testReportApiJson = testReport + "/api/json";
ts = float(failed_build_with_time[1]) / 1000.
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
if not summary_mode:
logging.info("===>%s" % str(testReport) + " (" + st + ")")
failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
if failing:
for ftest in failing:
if not summary_mode:
logging.info(" Failed test: %s" % ftest)
all_failing[ftest] = all_failing.get(ftest,0)+1
return all_failing
def main():
global numRunsToExamine
global summary_mode
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
# set up logger to write to stdout
soh = logging.StreamHandler(sys.stdout)
soh.setLevel(logging.INFO)
logger = logging.getLogger()
logger.removeHandler(logger.handlers[0])
logger.addHandler(soh)
opts = parse_args()
logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
+ "/job/" + opts.job_name + "")
if opts.num_failed_tests != -1:
summary_mode = True
all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
opts.num_prev_days)
if len(all_failing) == 0:
raise SystemExit(0)
if summary_mode and opts.num_failed_tests < len(all_failing):
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, top " + str(opts.num_failed_tests) +
" failed tests <#failedRuns: testName>:")
else:
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, all failed tests <#failedRuns: testName>:")
# print summary section: all failed tests sorted by how many times they failed
line_count = 0
for tn in sorted(all_failing, key=all_failing.get, reverse=True):
logging.info(" " + str(all_failing[tn])+ ": " + tn)
if summary_mode:
line_count += 1
if line_count == opts.num_failed_tests:
break
if summary_mode and error_count > 0:
logging.info("\n" + str(error_count) + " errors found, you may "
+ "re-run in non summary mode to see error details.");
if __name__ == "__main__":
main()

View File

@ -1,4 +1,3 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@ -18,7 +17,7 @@
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
FROM ubuntu:bionic
FROM ubuntu:focal
WORKDIR /root
@ -33,162 +32,69 @@ RUN echo APT::Install-Suggests "0"\; >> /etc/apt/apt.conf.d/10disableextras
ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_TERSE true
# hadolint ignore=DL3008
######
# Platform package dependency resolver
######
COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \
&& chmod a+r pkg-resolver/*.json
######
# Install packages from apt
######
# hadolint ignore=DL3008,SC2046
RUN apt-get -q update \
&& apt-get -q install -y --no-install-recommends \
ant \
apt-utils \
bats \
build-essential \
bzip2 \
clang \
cmake \
curl \
doxygen \
findbugs \
fuse \
g++ \
gcc \
git \
gnupg-agent \
libbcprov-java \
libbz2-dev \
libcurl4-openssl-dev \
libfuse-dev \
libprotobuf-dev \
libprotoc-dev \
libsasl2-dev \
libsnappy-dev \
libssl-dev \
libtool \
libzstd-dev \
locales \
make \
maven \
openjdk-11-jdk \
openjdk-8-jdk \
pinentry-curses \
pkg-config \
python \
python2.7 \
python-pip \
python-pkg-resources \
python-setuptools \
python-wheel \
rsync \
shellcheck \
software-properties-common \
sudo \
valgrind \
zlib1g-dev \
&& apt-get -q install -y --no-install-recommends python3 \
&& apt-get -q install -y --no-install-recommends $(pkg-resolver/resolve.py ubuntu:focal) \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN locale-gen en_US.UTF-8
ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8'
ENV PYTHONIOENCODING=utf-8
######
# Set env vars required to build Hadoop
######
ENV MAVEN_HOME /usr
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
ENV FINDBUGS_HOME /usr
#######
# Install Boost 1.72 (1.65 ships with Bionic)
# Set env vars for SpotBugs 4.2.2
#######
# hadolint ignore=DL3003
RUN mkdir -p /opt/boost-library \
&& curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2 \
&& mv boost_1_72_0.tar.bz2 /opt/boost-library \
&& cd /opt/boost-library \
&& tar --bzip2 -xf boost_1_72_0.tar.bz2 \
&& cd /opt/boost-library/boost_1_72_0 \
&& ./bootstrap.sh --prefix=/usr/ \
&& ./b2 --without-python install \
&& cd /root \
&& rm -rf /opt/boost-library
ENV SPOTBUGS_HOME /opt/spotbugs
######
# Install Google Protobuf 3.7.1 (3.0.0 ships with Bionic)
######
# hadolint ignore=DL3003
RUN mkdir -p /opt/protobuf-src \
&& curl -L -s -S \
https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \
-o /opt/protobuf.tar.gz \
&& tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \
&& cd /opt/protobuf-src \
&& ./configure --prefix=/opt/protobuf \
&& make install \
&& cd /root \
&& rm -rf /opt/protobuf-src
#######
# Set env vars for Google Protobuf 3.7.1
#######
ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
####
# Install pylint at fixed version (2.0.0 removed python2 support)
# https://github.com/PyCQA/pylint/issues/2294
####
RUN pip2 install \
astroid==1.6.6 \
isort==4.3.21 \
configparser==4.0.2 \
pylint==1.9.2
####
# Install dateutil.parser
####
RUN pip2 install python-dateutil==2.7.3
###
# Install node.js 10.x for web UI framework (4.2.6 ships with Xenial)
###
# hadolint ignore=DL3008
RUN curl -L -s -S https://deb.nodesource.com/setup_10.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g bower@1.8.8
###
## Install Yarn 1.12.1 for web UI framework
####
RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \
&& echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \
&& apt-get -q update \
&& apt-get install -y --no-install-recommends yarn=1.21.1-1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
###
# Install hadolint
####
RUN curl -L -s -S \
https://github.com/hadolint/hadolint/releases/download/v1.11.1/hadolint-Linux-x86_64 \
-o /bin/hadolint \
&& chmod a+rx /bin/hadolint \
&& shasum -a 512 /bin/hadolint | \
awk '$1!="734e37c1f6619cbbd86b9b249e69c9af8ee1ea87a2b1ff71dccda412e9dac35e63425225a95d71572091a3f0a11e9a04c2fc25d9e91b840530c26af32b9891ca" {exit(1)}'
###
# Avoid out of memory errors in builds
###
ENV MAVEN_OPTS -Xms256m -Xmx1536m
ENV MAVEN_OPTS -Xms256m -Xmx3072m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true
####
# Install packages
####
RUN pkg-resolver/install-common-pkgs.sh
RUN pkg-resolver/install-spotbugs.sh ubuntu:focal
RUN pkg-resolver/install-boost.sh ubuntu:focal
RUN pkg-resolver/install-protobuf.sh ubuntu:focal
RUN pkg-resolver/install-hadolint.sh ubuntu:focal
RUN pkg-resolver/install-intel-isa-l.sh ubuntu:focal
###
# Everything past this point is either not needed for testing or breaks Yetus.
# So tell Yetus not to read the rest of the file:
# YETUS CUT HERE
###
# Hugo static website generator for new hadoop site
RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-64bit.deb \
&& dpkg --install hugo.deb \
&& rm hugo.deb
# Add a welcome message and environment checks.
COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh
RUN chmod 755 /root/hadoop_env_checks.sh

View File

@ -17,7 +17,7 @@
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
FROM ubuntu:bionic
FROM ubuntu:focal
WORKDIR /root
@ -33,146 +33,44 @@ ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_TERSE true
######
# Install common dependencies from packages. Versions here are either
# sufficient or irrelevant.
# Platform package dependency resolver
######
# hadolint ignore=DL3008
COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \
&& chmod a+r pkg-resolver/*.json
######
# Install packages from apt
######
# hadolint ignore=DL3008,SC2046
RUN apt-get -q update \
&& apt-get -q install -y --no-install-recommends \
ant \
apt-utils \
bats \
build-essential \
bzip2 \
clang \
cmake \
curl \
doxygen \
findbugs \
fuse \
g++ \
gcc \
git \
gnupg-agent \
libbcprov-java \
libbz2-dev \
libcurl4-openssl-dev \
libfuse-dev \
libprotobuf-dev \
libprotoc-dev \
libsasl2-dev \
libsnappy-dev \
libssl-dev \
libtool \
libzstd-dev \
locales \
make \
maven \
openjdk-11-jdk \
openjdk-8-jdk \
pinentry-curses \
pkg-config \
python \
python2.7 \
python-pip \
python-pkg-resources \
python-setuptools \
python-wheel \
rsync \
shellcheck \
software-properties-common \
sudo \
valgrind \
zlib1g-dev \
&& apt-get -q install -y --no-install-recommends python3 \
&& apt-get -q install -y --no-install-recommends $(pkg-resolver/resolve.py ubuntu:focal::arch64) \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN locale-gen en_US.UTF-8
ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8'
ENV PYTHONIOENCODING=utf-8
######
# Set env vars required to build Hadoop
######
ENV MAVEN_HOME /usr
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
ENV FINDBUGS_HOME /usr
#######
# Install Boost 1.72 (1.65 ships with Bionic)
# Set env vars for SpotBugs 4.2.2
#######
# hadolint ignore=DL3003
RUN mkdir -p /opt/boost-library \
&& curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2 \
&& mv boost_1_72_0.tar.bz2 /opt/boost-library \
&& cd /opt/boost-library \
&& tar --bzip2 -xf boost_1_72_0.tar.bz2 \
&& cd /opt/boost-library/boost_1_72_0 \
&& ./bootstrap.sh --prefix=/usr/ \
&& ./b2 --without-python install \
&& cd /root \
&& rm -rf /opt/boost-library
ENV SPOTBUGS_HOME /opt/spotbugs
######
# Install Google Protobuf 3.7.1 (3.0.0 ships with Bionic)
######
# hadolint ignore=DL3003
RUN mkdir -p /opt/protobuf-src \
&& curl -L -s -S \
https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \
-o /opt/protobuf.tar.gz \
&& tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \
&& cd /opt/protobuf-src \
&& ./configure --prefix=/opt/protobuf \
&& make install \
&& cd /root \
&& rm -rf /opt/protobuf-src
#######
# Set env vars for Google Protobuf 3.7.1
#######
ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
####
# Install pylint at fixed version (2.0.0 removed python2 support)
# https://github.com/PyCQA/pylint/issues/2294
####
RUN pip2 install \
astroid==1.6.6 \
isort==4.3.21 \
configparser==4.0.2 \
pylint==1.9.2
####
# Install dateutil.parser
####
RUN pip2 install python-dateutil==2.7.3
###
# Install node.js 10.x for web UI framework (4.2.6 ships with Xenial)
###
# hadolint ignore=DL3008
RUN curl -L -s -S https://deb.nodesource.com/setup_10.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g bower@1.8.8
###
## Install Yarn 1.12.1 for web UI framework
####
RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \
&& echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \
&& apt-get -q update \
&& apt-get install -y --no-install-recommends yarn=1.21.1-1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
###
# Install phantomjs built for aarch64
####
RUN mkdir -p /opt/phantomjs \
&& curl -L -s -S \
https://github.com/liusheng/phantomjs/releases/download/2.1.1/phantomjs-2.1.1-linux-aarch64.tar.bz2 \
-o /opt/phantomjs/phantomjs-2.1.1-linux-aarch64.tar.bz2 \
&& tar xvjf /opt/phantomjs/phantomjs-2.1.1-linux-aarch64.tar.bz2 --strip-components 1 -C /opt/phantomjs \
&& cp /opt/phantomjs/bin/phantomjs /usr/bin/ \
&& rm -rf /opt/phantomjs
###
# Avoid out of memory errors in builds
###
@ -181,18 +79,23 @@ ENV MAVEN_OPTS -Xms256m -Xmx1536m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true
# Force PhantomJS to be in 'headless' mode, do not connect to Xwindow
ENV QT_QPA_PLATFORM offscreen
####
# Install packages
####
RUN pkg-resolver/install-common-pkgs.sh
RUN pkg-resolver/install-spotbugs.sh ubuntu:focal::arch64
RUN pkg-resolver/install-boost.sh ubuntu:focal::arch64
RUN pkg-resolver/install-protobuf.sh ubuntu:focal::arch64
###
# Everything past this point is either not needed for testing or breaks Yetus.
# So tell Yetus not to read the rest of the file:
# YETUS CUT HERE
###
# Hugo static website generator (for new hadoop site docs)
RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-ARM64.deb \
&& dpkg --install hugo.deb \
&& rm hugo.deb
# Add a welcome message and environment checks.
COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh
RUN chmod 755 /root/hadoop_env_checks.sh

View File

@ -0,0 +1,96 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
FROM centos:7
WORKDIR /root
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
######
# Platform package dependency resolver
######
COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \
&& chmod a+r pkg-resolver/*.json
######
# Install packages from yum
######
# hadolint ignore=DL3008,SC2046
RUN yum update -y \
&& yum groupinstall -y "Development Tools" \
&& yum install -y \
centos-release-scl \
python3 \
&& yum install -y $(pkg-resolver/resolve.py centos:7)
# Set GCC 9 as the default C/C++ compiler
RUN echo "source /opt/rh/devtoolset-9/enable" >> /etc/bashrc
SHELL ["/bin/bash", "--login", "-c"]
######
# Set the environment variables needed for CMake
# to find and use GCC 9 for compilation
######
ENV GCC_HOME "/opt/rh/devtoolset-9"
ENV CC "${GCC_HOME}/root/usr/bin/gcc"
ENV CXX "${GCC_HOME}/root/usr/bin/g++"
ENV SHLVL 1
ENV LD_LIBRARY_PATH "${GCC_HOME}/root/usr/lib64:${GCC_HOME}/root/usr/lib:${GCC_HOME}/root/usr/lib64/dyninst:${GCC_HOME}/root/usr/lib/dyninst:${GCC_HOME}/root/usr/lib64:${GCC_HOME}/root/usr/lib:/usr/lib:/usr/lib64"
ENV PCP_DIR "${GCC_HOME}/root"
ENV MANPATH "${GCC_HOME}/root/usr/share/man:"
ENV PATH "${GCC_HOME}/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ENV PKG_CONFIG_PATH "${GCC_HOME}/root/usr/lib64/pkgconfig"
ENV INFOPATH "${GCC_HOME}/root/usr/share/info"
# TODO: Set locale
######
# Set env vars required to build Hadoop
######
ENV MAVEN_HOME /opt/maven
ENV PATH "${PATH}:${MAVEN_HOME}/bin"
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0
#######
# Set env vars for SpotBugs
#######
ENV SPOTBUGS_HOME /opt/spotbugs
#######
# Set env vars for Google Protobuf
#######
ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
######
# Install packages
######
RUN pkg-resolver/install-maven.sh centos:7
RUN pkg-resolver/install-cmake.sh centos:7
RUN pkg-resolver/install-zstandard.sh centos:7
RUN pkg-resolver/install-yasm.sh centos:7
RUN pkg-resolver/install-protobuf.sh centos:7
RUN pkg-resolver/install-boost.sh centos:7
RUN pkg-resolver/install-spotbugs.sh centos:7
RUN pkg-resolver/install-nodejs.sh centos:7
RUN pkg-resolver/install-git.sh centos:7
RUN pkg-resolver/install-common-pkgs.sh

View File

@ -0,0 +1,118 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
FROM centos:8
WORKDIR /root
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
######
# Platform package dependency resolver
######
COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \
&& chmod a+r pkg-resolver/*.json
######
# Centos 8 has reached its EOL and the packages
# are no longer available on mirror.centos.org site.
# Please see https://www.centos.org/centos-linux-eol/
######
RUN pkg-resolver/set-vault-as-baseurl-centos.sh centos:8
######
# Install packages from yum
######
# hadolint ignore=DL3008,SC2046
RUN yum update -y \
&& yum install -y python3 \
&& yum install -y $(pkg-resolver/resolve.py centos:8)
####
# Install EPEL
####
RUN pkg-resolver/install-epel.sh centos:8
RUN dnf --enablerepo=powertools install -y \
doxygen \
snappy-devel \
yasm
RUN dnf install -y \
bouncycastle \
gcc-toolset-9-gcc \
gcc-toolset-9-gcc-c++ \
libpmem-devel
# Set GCC 9 as the default C/C++ compiler
RUN echo "source /opt/rh/gcc-toolset-9/enable" >> /etc/bashrc
SHELL ["/bin/bash", "--login", "-c"]
######
# Set the environment variables needed for CMake
# to find and use GCC 9 for compilation
######
ENV GCC_HOME "/opt/rh/gcc-toolset-9"
ENV CC "${GCC_HOME}/root/usr/bin/gcc"
ENV CXX "${GCC_HOME}/root/usr/bin/g++"
ENV MODULES_RUN_QUARANTINE "LD_LIBRARY_PATH LD_PRELOAD"
ENV MODULES_CMD "/usr/share/Modules/libexec/modulecmd.tcl"
ENV SHLVL 1
ENV MODULEPATH "/etc/scl/modulefiles:/usr/share/Modules/modulefiles:/etc/modulefiles:/usr/share/modulefiles"
ENV MODULEPATH_modshare "/usr/share/modulefiles:1:/usr/share/Modules/modulefiles:1:/etc/modulefiles:1"
ENV MODULESHOME "/usr/share/Modules"
ENV LD_LIBRARY_PATH "${GCC_HOME}/root/usr/lib64:${GCC_HOME}/root/usr/lib:${GCC_HOME}/root/usr/lib64/dyninst:${GCC_HOME}/root/usr/lib/dyninst:${GCC_HOME}/root/usr/lib64:${GCC_HOME}/root/usr/lib:/usr/lib:/usr/lib64"
ENV PCP_DIR "${GCC_HOME}/root"
ENV MANPATH "${GCC_HOME}/root/usr/share/man::"
ENV PATH "${GCC_HOME}/root/usr/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ENV PKG_CONFIG_PATH "${GCC_HOME}/root/usr/lib64/pkgconfig"
ENV INFOPATH "${GCC_HOME}/root/usr/share/info"
# TODO: Set locale
######
# Set env vars required to build Hadoop
######
ENV MAVEN_HOME /opt/maven
ENV PATH "${PATH}:${MAVEN_HOME}/bin"
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0
#######
# Set env vars for SpotBugs
#######
ENV SPOTBUGS_HOME /opt/spotbugs
#######
# Set env vars for Google Protobuf
#######
ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
######
# Install packages
######
RUN pkg-resolver/install-maven.sh centos:8
RUN pkg-resolver/install-cmake.sh centos:8
RUN pkg-resolver/install-boost.sh centos:8
RUN pkg-resolver/install-spotbugs.sh centos:8
RUN pkg-resolver/install-protobuf.sh centos:8
RUN pkg-resolver/install-zstandard.sh centos:8
RUN pkg-resolver/install-common-pkgs.sh

View File

@ -0,0 +1,101 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
FROM debian:10
WORKDIR /root
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
#####
# Disable suggests/recommends
#####
RUN echo APT::Install-Recommends "0"\; > /etc/apt/apt.conf.d/10disableextras
RUN echo APT::Install-Suggests "0"\; >> /etc/apt/apt.conf.d/10disableextras
ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_TERSE true
######
# Platform package dependency resolver
######
COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/install-pkg-resolver.sh
RUN pkg-resolver/install-pkg-resolver.sh debian:10
######
# Install packages from apt
######
# hadolint ignore=DL3008,SC2046
RUN apt-get -q update \
&& apt-get -q install -y --no-install-recommends $(pkg-resolver/resolve.py debian:10) \
&& echo 'deb http://deb.debian.org/debian bullseye main' >> /etc/apt/sources.list \
&& apt-get -q update \
&& apt-get -q install -y --no-install-recommends -t bullseye $(pkg-resolver/resolve.py --release=bullseye debian:10) \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# TODO : Set locale
######
# Set env vars required to build Hadoop
######
ENV MAVEN_HOME /usr
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
#######
# Set env vars for SpotBugs 4.2.2
#######
ENV SPOTBUGS_HOME /opt/spotbugs
#######
# Set env vars for Google Protobuf 3.7.1
#######
ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
###
# Avoid out of memory errors in builds
###
ENV MAVEN_OPTS -Xms256m -Xmx3072m
# Skip gpg verification when downloading Yetus via yetus-wrapper
ENV HADOOP_SKIP_YETUS_VERIFICATION true
####
# Install packages
####
RUN pkg-resolver/install-spotbugs.sh debian:10
RUN pkg-resolver/install-boost.sh debian:10
RUN pkg-resolver/install-protobuf.sh debian:10
RUN pkg-resolver/install-hadolint.sh debian:10
RUN pkg-resolver/install-intel-isa-l.sh debian:10
###
# Everything past this point is either not needed for testing or breaks Yetus.
# So tell Yetus not to read the rest of the file:
# YETUS CUT HERE
###
# Add a welcome message and environment checks.
COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh
RUN chmod 755 /root/hadoop_env_checks.sh
# hadolint ignore=SC2016
RUN echo '${HOME}/hadoop_env_checks.sh' >> /root/.bashrc

View File

@ -0,0 +1,114 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Docker images for building Hadoop
This folder contains the Dockerfiles for building Hadoop on various platforms.
# Dependency management
The mode of installation of the dependencies needed for building Hadoop varies from one platform to
the other. Different platforms have different toolchains. Some packages tend to be polymorphic
across platforms and most commonly, a package that's readily available in one platform's toolchain
isn't available on another. We thus, resort to building and installing the package from source,
causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all
the platforms. We need a system to track a dependency - for a package - for a platform
- (and optionally) for a release. Thus, there's a lot of diversity that needs to be handled for
managing package dependencies and
`pkg-resolver` caters to that.
## Supported platforms
`pkg-resolver/platforms.json` contains a list of the supported platforms for dependency management.
## Package dependencies
`pkg-resolver/packages.json` maps a dependency to a given platform. Here's the schema of this JSON.
```json
{
"dependency_1": {
"platform_1": "package_1",
"platform_2": [
"package_1",
"package_2"
]
},
"dependency_2": {
"platform_1": [
"package_1",
"package_2",
"package_3"
]
},
"dependency_3": {
"platform_1": {
"release_1": "package_1_1_1",
"release_2": [
"package_1_2_1",
"package_1_2_2"
]
},
"platform_2": [
"package_2_1",
{
"release_1": "package_2_1_1"
}
]
}
}
```
The root JSON element contains unique _dependency_ children. This in turn contains the name of the _
platforms_ and the list of _packages_ to be installed for that platform. Just to give an example of
how to interpret the above JSON -
1. For `dependency_1`, `package_1` needs to be installed for `platform_1`.
2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`.
3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for
`platform_1`.
4. For `dependency_3`, `package_1_1_1` gets installed only if `release_1` has been specified
for `platform_1`.
5. For `dependency_3`, the packages `package_1_2_1` and `package_1_2_2` gets installed only
if `release_2` has been specified for `platform_1`.
6. For `dependency_3`, for `platform_2`, `package_2_1` is always installed, but `package_2_1_1` gets
installed only if `release_1` has been specified.
### Tool help
```shell
$ pkg-resolver/resolve.py -h
usage: resolve.py [-h] [-r RELEASE] platform
Platform package dependency resolver for building Apache Hadoop
positional arguments:
platform The name of the platform to resolve the dependencies for
optional arguments:
-h, --help show this help message and exit
-r RELEASE, --release RELEASE
The release label to filter the packages for the given platform
```
## Standalone packages
Most commonly, some packages are not available across the toolchains in various platforms. Thus, we
would need to build and install them. Since we need to do this across all the Dockerfiles for all
the platforms, it could lead to code duplication and managing them becomes a hassle. Thus, we put
the build steps in a `pkg-resolver/install-<package>.sh` and invoke this in all the Dockerfiles.

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Checks whether the given platform is supported for building Apache Hadoop
"""
import json
import sys
def get_platforms():
"""
:return: A list of the supported platforms managed by pkg-resolver.
"""
with open('pkg-resolver/platforms.json', encoding='utf-8', mode='r') as platforms_file:
return json.loads(platforms_file.read())
def is_supported_platform(platform):
"""
:param platform: The name of the platform
:return: Whether the platform is supported
"""
return platform in get_platforms()
if __name__ == '__main__':
if len(sys.argv) != 2:
print('ERROR: Expecting 1 argument, {} were provided'.format(len(sys.argv) - 1),
file=sys.stderr)
sys.exit(1)
sys.exit(0 if is_supported_platform(sys.argv[1]) else 1)

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="1.72.0"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "1.72.0" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "1.72.0" ]; then
# hadolint ignore=DL3003
mkdir -p /opt/boost-library &&
curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download >boost_1_72_0.tar.bz2 &&
mv boost_1_72_0.tar.bz2 /opt/boost-library &&
cd /opt/boost-library &&
tar --bzip2 -xf boost_1_72_0.tar.bz2 &&
cd /opt/boost-library/boost_1_72_0 &&
./bootstrap.sh --prefix=/usr/ &&
./b2 --without-python install &&
cd /root &&
rm -rf /opt/boost-library
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,53 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="3.19.0"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "3.19.0" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "3.19.0" ]; then
# hadolint ignore=DL3003
mkdir -p /tmp/cmake /opt/cmake &&
curl -L -s -S https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz -o /tmp/cmake/cmake-3.19.0.tar.gz &&
tar xzf /tmp/cmake/cmake-3.19.0.tar.gz --strip-components 1 -C /opt/cmake &&
cd /opt/cmake || exit && ./bootstrap &&
make "-j$(nproc)" &&
make install &&
cd /root || exit
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
######
# Install pylint and python-dateutil
######
pip3 install pylint==2.6.0 python-dateutil==2.8.1

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="8"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "8" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "8" ]; then
mkdir -p /tmp/epel &&
curl -L -s -S https://download-ib01.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm \
-o /tmp/epel/epel-release-latest-8.noarch.rpm &&
rpm -Uvh /tmp/epel/epel-release-latest-8.noarch.rpm
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="2.9.5"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "2.9.5" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "2.9.5" ]; then
# hadolint ignore=DL3003
mkdir -p /tmp/git /opt/git &&
curl -L -s -S https://mirrors.edge.kernel.org/pub/software/scm/git/git-2.9.5.tar.gz >/tmp/git/git-2.9.5.tar.gz &&
tar xzf /tmp/git/git-2.9.5.tar.gz --strip-components 1 -C /opt/git &&
cd /opt/git || exit &&
make configure &&
./configure --prefix=/usr/local &&
make "-j$(nproc)" &&
make install &&
cd /root || exit
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
curl -L -s -S \
https://github.com/hadolint/hadolint/releases/download/v1.11.1/hadolint-Linux-x86_64 \
-o /bin/hadolint &&
chmod a+rx /bin/hadolint &&
shasum -a 512 /bin/hadolint |
awk '$1!="734e37c1f6619cbbd86b9b249e69c9af8ee1ea87a2b1ff71dccda412e9dac35e63425225a95d71572091a3f0a11e9a04c2fc25d9e91b840530c26af32b9891ca" {exit(1)}'

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="2.29.0"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "2.29.0" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "2.29.0" ]; then
# hadolint ignore=DL3003,DL3008
mkdir -p /opt/isa-l-src &&
curl -L -s -S \
https://github.com/intel/isa-l/archive/v2.29.0.tar.gz \
-o /opt/isa-l.tar.gz &&
tar xzf /opt/isa-l.tar.gz --strip-components 1 -C /opt/isa-l-src &&
cd /opt/isa-l-src &&
./autogen.sh &&
./configure &&
make "-j$(nproc)" &&
make install &&
cd /root &&
rm -rf /opt/isa-l-src
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="3.6.3"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "3.6.3" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "3.6.3" ]; then
mkdir -p /opt/maven /tmp/maven &&
curl -L -s -S https://mirrors.estointernet.in/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \
-o /tmp/maven/apache-maven-3.6.3-bin.tar.gz &&
tar xzf /tmp/maven/apache-maven-3.6.3-bin.tar.gz --strip-components 1 -C /opt/maven
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,54 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="14.16.1"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "14.16.1" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "14.16.1" ]; then
# hadolint ignore=DL3003
mkdir -p /tmp/node &&
curl -L -s -S https://nodejs.org/dist/v14.16.1/node-v14.16.1.tar.gz -o /tmp/node-v14.16.1.tar.gz &&
tar xzf /tmp/node-v14.16.1.tar.gz --strip-components 1 -C /tmp/node &&
cd /tmp/node || exit &&
./configure &&
make "-j$(nproc)" &&
make install &&
cd /root || exit
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: No platform specified, please specify one"
exit 1
fi
chmod a+x pkg-resolver/*.sh pkg-resolver/*.py
chmod a+r pkg-resolver/*.json
if [ "$1" == "debian:10" ]; then
apt-get -q update
apt-get -q install -y --no-install-recommends python3 \
python3-pip \
python3-pkg-resources \
python3-setuptools \
python3-wheel
pip3 install pylint==2.6.0 python-dateutil==2.8.1
else
# Need to add the code for the rest of the platforms - HADOOP-17920
echo "ERROR: The given platform $1 is not yet supported or is invalid"
exit 1
fi

View File

@ -0,0 +1,57 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="3.7.1"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "3.7.1" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "3.7.1" ]; then
# hadolint ignore=DL3003
mkdir -p /opt/protobuf-src &&
curl -L -s -S \
https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \
-o /opt/protobuf.tar.gz &&
tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src &&
cd /opt/protobuf-src &&
./configure --prefix=/opt/protobuf &&
make "-j$(nproc)" &&
make install &&
cd /root &&
rm -rf /opt/protobuf-src
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,50 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="4.2.2"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "4.2.2" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "4.2.2" ]; then
mkdir -p /opt/spotbugs &&
curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \
-o /opt/spotbugs.tgz &&
tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs &&
chmod +x /opt/spotbugs/bin/*
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="1.2.0-4"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "1.2.0-4" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "1.2.0-4" ]; then
mkdir -p /tmp/yasm &&
curl -L -s -S https://download-ib01.fedoraproject.org/pub/epel/7/x86_64/Packages/y/yasm-1.2.0-4.el7.x86_64.rpm \
-o /tmp/yasm-1.2.0-4.el7.x86_64.rpm &&
rpm -Uvh /tmp/yasm-1.2.0-4.el7.x86_64.rpm
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,53 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
pkg-resolver/check_platform.py "$1"
if [ $? -eq 1 ]; then
echo "ERROR: Unsupported platform $1"
exit 1
fi
default_version="1.4.9"
version_to_install=$default_version
if [ -n "$2" ]; then
version_to_install="$2"
fi
if [ "$version_to_install" != "1.4.9" ]; then
echo "WARN: Don't know how to install version $version_to_install, installing the default version $default_version instead"
version_to_install=$default_version
fi
if [ "$version_to_install" == "1.4.9" ]; then
# hadolint ignore=DL3003
mkdir -p /opt/zstd /tmp/zstd &&
curl -L -s -S https://github.com/facebook/zstd/archive/refs/tags/v1.4.9.tar.gz -o /tmp/zstd/v1.4.9.tar.gz &&
tar xzf /tmp/zstd/v1.4.9.tar.gz --strip-components 1 -C /opt/zstd &&
cd /opt/zstd || exit &&
make "-j$(nproc)" &&
make install &&
cd /root || exit
else
echo "ERROR: Don't know how to install version $version_to_install"
exit 1
fi

View File

@ -0,0 +1,361 @@
{
"ant": {
"debian:10": "ant",
"ubuntu:focal": "ant",
"ubuntu:focal::arch64": "ant",
"centos:7": "ant",
"centos:8": "ant"
},
"apt-utils": {
"debian:10": "apt-utils",
"ubuntu:focal": "apt-utils",
"ubuntu:focal::arch64": "apt-utils"
},
"automake": {
"debian:10": "automake",
"ubuntu:focal": "automake",
"ubuntu:focal::arch64": "automake",
"centos:7": "automake",
"centos:8": "automake"
},
"autoconf": {
"centos:7": "autoconf"
},
"bats": {
"debian:10": "bats",
"ubuntu:focal": "bats",
"ubuntu:focal::arch64": "bats"
},
"build-essential": {
"debian:10": "build-essential",
"ubuntu:focal": "build-essential",
"ubuntu:focal::arch64": "build-essential",
"centos:7": "build-essential"
},
"bzip2": {
"debian:10": [
"bzip2",
"libbz2-dev"
],
"ubuntu:focal": [
"bzip2",
"libbz2-dev"
],
"ubuntu:focal::arch64": [
"bzip2",
"libbz2-dev"
],
"centos:7": [
"bzip2",
"bzip2-devel"
],
"centos:8": [
"bzip2",
"bzip2-devel"
]
},
"clang": {
"debian:10": "clang",
"ubuntu:focal": "clang",
"ubuntu:focal::arch64": "clang",
"centos:7": "clang",
"centos:8": "clang"
},
"cmake": {
"debian:10": "cmake",
"ubuntu:focal": "cmake",
"ubuntu:focal::arch64": "cmake"
},
"curl": {
"debian:10": [
"curl",
"libcurl4-openssl-dev"
],
"ubuntu:focal": [
"curl",
"libcurl4-openssl-dev"
],
"ubuntu:focal::arch64": [
"curl",
"libcurl4-openssl-dev"
],
"centos:7": [
"curl",
"libcurl-devel"
],
"centos:8": [
"curl",
"libcurl-devel"
]
},
"doxygen": {
"debian:10": "doxygen",
"ubuntu:focal": "doxygen",
"ubuntu:focal::arch64": "doxygen",
"centos:7": "doxygen"
},
"dnf": {
"centos:8": "dnf"
},
"fuse": {
"debian:10": [
"fuse",
"libfuse-dev"
],
"ubuntu:focal": [
"fuse",
"libfuse-dev"
],
"ubuntu:focal::arch64": [
"fuse",
"libfuse-dev"
],
"centos:7": [
"fuse",
"fuse-libs",
"fuse-devel"
],
"centos:8": [
"fuse",
"fuse-libs",
"fuse-devel"
]
},
"gcc": {
"debian:10": {
"bullseye": [
"gcc",
"g++"
]
},
"ubuntu:focal": [
"gcc",
"g++"
],
"ubuntu:focal::arch64": [
"gcc",
"g++"
],
"centos:7": [
"centos-release-scl",
"devtoolset-9"
]
},
"gettext": {
"centos:7": "gettext-devel"
},
"git": {
"debian:10": "git",
"ubuntu:focal": "git",
"ubuntu:focal::arch64": "git",
"centos:8": "git"
},
"gnupg-agent": {
"debian:10": "gnupg-agent",
"ubuntu:focal": "gnupg-agent",
"ubuntu:focal::arch64": "gnupg-agent"
},
"hugo": {
"debian:10": "hugo",
"ubuntu:focal": "hugo",
"ubuntu:focal::arch64": "hugo"
},
"libbcprov-java": {
"debian:10": "libbcprov-java",
"ubuntu:focal": "libbcprov-java",
"ubuntu:focal::arch64": "libbcprov-java"
},
"libtool": {
"debian:10": "libtool",
"ubuntu:focal": "libtool",
"ubuntu:focal::arch64": "libtool",
"centos:7": "libtool",
"centos:8": "libtool"
},
"openssl": {
"debian:10": "libssl-dev",
"ubuntu:focal": "libssl-dev",
"ubuntu:focal::arch64": "libssl-dev",
"centos:7": "openssl-devel",
"centos:8": "openssl-devel"
},
"perl": {
"centos:7": [
"perl-CPAN",
"perl-devel"
]
},
"protocol-buffers": {
"debian:10": [
"libprotobuf-dev",
"libprotoc-dev"
],
"ubuntu:focal": [
"libprotobuf-dev",
"libprotoc-dev"
],
"ubuntu:focal::arch64": [
"libprotobuf-dev",
"libprotoc-dev"
]
},
"sasl": {
"debian:10": "libsasl2-dev",
"ubuntu:focal": "libsasl2-dev",
"ubuntu:focal::arch64": "libsasl2-dev",
"centos:7": "cyrus-sasl-devel",
"centos:8": "cyrus-sasl-devel"
},
"snappy": {
"debian:10": "libsnappy-dev",
"ubuntu:focal": "libsnappy-dev",
"ubuntu:focal::arch64": "libsnappy-dev",
"centos:7": "snappy-devel"
},
"zlib": {
"debian:10": [
"libzstd-dev",
"zlib1g-dev"
],
"ubuntu:focal": [
"libzstd-dev",
"zlib1g-dev"
],
"ubuntu:focal::arch64": [
"libzstd-dev",
"zlib1g-dev"
],
"centos:7": [
"zlib-devel",
"lz4-devel"
],
"centos:8": [
"zlib-devel",
"lz4-devel"
]
},
"locales": {
"debian:10": "locales",
"ubuntu:focal": "locales",
"ubuntu:focal::arch64": "locales"
},
"libtirpc-devel": {
"centos:7": "libtirpc-devel",
"centos:8": "libtirpc-devel"
},
"libpmem": {
"centos:7": "libpmem-devel"
},
"make": {
"debian:10": "make",
"ubuntu:focal": "make",
"ubuntu:focal::arch64": "make",
"centos:7": "make",
"centos:8": "make"
},
"maven": {
"debian:10": "maven",
"ubuntu:focal": "maven",
"ubuntu:focal::arch64": "maven"
},
"java": {
"debian:10": "openjdk-11-jdk",
"ubuntu:focal": [
"openjdk-8-jdk",
"openjdk-11-jdk"
],
"ubuntu:focal::arch64": [
"openjdk-8-jdk",
"openjdk-11-jdk"
]
},
"pinentry-curses": {
"debian:10": "pinentry-curses",
"ubuntu:focal": "pinentry-curses",
"ubuntu:focal::arch64": "pinentry-curses",
"centos:7": "pinentry-curses",
"centos:8": "pinentry-curses"
},
"pkg-config": {
"debian:10": "pkg-config",
"ubuntu:focal": "pkg-config",
"ubuntu:focal::arch64": "pkg-config",
"centos:8": "pkg-config"
},
"python": {
"debian:10": [
"python3",
"python3-pip",
"python3-pkg-resources",
"python3-setuptools",
"python3-wheel"
],
"ubuntu:focal": [
"python3",
"python3-pip",
"python3-pkg-resources",
"python3-setuptools",
"python3-wheel"
],
"ubuntu:focal::arch64": [
"python2.7",
"python3",
"python3-pip",
"python3-pkg-resources",
"python3-setuptools",
"python3-wheel"
],
"centos:7": [
"python3",
"python3-pip",
"python3-setuptools",
"python3-wheel"
],
"centos:8": [
"python3",
"python3-pip",
"python3-setuptools",
"python3-wheel"
]
},
"rsync": {
"debian:10": "rsync",
"ubuntu:focal": "rsync",
"ubuntu:focal::arch64": "rsync",
"centos:7": "rsync",
"centos:8": "rsync"
},
"shellcheck": {
"debian:10": "shellcheck",
"ubuntu:focal": "shellcheck",
"ubuntu:focal::arch64": "shellcheck"
},
"shasum": {
"centos:7": "perl-Digest-SHA",
"centos:8": "perl-Digest-SHA"
},
"software-properties-common": {
"debian:10": "software-properties-common",
"ubuntu:focal": "software-properties-common",
"ubuntu:focal::arch64": "software-properties-common"
},
"sudo": {
"debian:10": "sudo",
"ubuntu:focal": "sudo",
"ubuntu:focal::arch64": "sudo",
"centos:7": "sudo",
"centos:8": "sudo"
},
"valgrind": {
"debian:10": "valgrind",
"ubuntu:focal": "valgrind",
"ubuntu:focal::arch64": "valgrind",
"centos:7": "valgrind",
"centos:8": "valgrind"
},
"yasm": {
"debian:10": "yasm",
"ubuntu:focal": "yasm",
"ubuntu:focal::arch64": "yasm"
}
}

View File

@ -0,0 +1,7 @@
[
"ubuntu:focal",
"ubuntu:focal::arch64",
"centos:7",
"centos:8",
"debian:10"
]

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Platform package dependency resolver for building Apache Hadoop.
"""
import argparse
import json
import sys
from check_platform import is_supported_platform
def get_packages(platform, release=None):
"""
Resolve and get the list of packages to install for the given platform.
:param platform: The platform for which the packages needs to be resolved.
:param release: An optional parameter that filters the packages of the given platform for the
specified release.
:return: A list of resolved packages to install.
"""
with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file:
pkgs = json.loads(pkg_file.read())
packages = []
def process_package(package, in_release=False):
"""
Processes the given package object that belongs to a platform and adds it to the packages
list variable in the parent scope.
In essence, this method recursively traverses the JSON structure defined in packages.json
and performs the core filtering.
:param package: The package object to process.
:param in_release: A boolean that indicates whether the current travels belongs to a package
that needs to be filtered for the given release label.
"""
if isinstance(package, list):
for entry in package:
process_package(entry, in_release)
elif isinstance(package, dict):
if release is None:
return
for entry in package.get(release, []):
process_package(entry, in_release=True)
elif isinstance(package, str):
# Filter out the package that doesn't belong to this release,
# if a release label has been specified.
if release is not None and not in_release:
return
packages.append(package)
else:
raise Exception('Unknown package of type: {}'.format(type(package)))
for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()):
process_package(platforms.get(platform))
return packages
if __name__ == '__main__':
if len(sys.argv) < 2:
print('ERROR: Need at least 1 argument, {} were provided'.format(len(sys.argv) - 1),
file=sys.stderr)
sys.exit(1)
arg_parser = argparse.ArgumentParser(
description='Platform package dependency resolver for building Apache Hadoop')
arg_parser.add_argument('-r', '--release', nargs=1, type=str,
help='The release label to filter the packages for the given platform')
arg_parser.add_argument('platform', nargs=1, type=str,
help='The name of the platform to resolve the dependencies for')
args = arg_parser.parse_args()
if not is_supported_platform(args.platform[0]):
print(
'ERROR: The given platform {} is not supported. '
'Please refer to platforms.json for a list of supported platforms'.format(
args.platform), file=sys.stderr)
sys.exit(1)
packages_to_install = get_packages(args.platform[0],
args.release[0] if args.release is not None else None)
print(' '.join(packages_to_install))

View File

@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 1 ]; then
echo "ERROR: Need at least 1 argument, $# were provided"
exit 1
fi
if [ "$1" == "centos:7" ] || [ "$1" == "centos:8" ]; then
cd /etc/yum.repos.d/ || exit &&
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* &&
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* &&
yum update -y &&
cd /root || exit
else
echo "ERROR: Setting the archived baseurl is only supported for centos 7 and 8 environments"
exit 1
fi

View File

@ -0,0 +1,134 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
Apache Hadoop Git/Jira FixVersion validation
============================================================
Git commits in Apache Hadoop contains Jira number of the format
HADOOP-XXXX or HDFS-XXXX or YARN-XXXX or MAPREDUCE-XXXX.
While creating a release candidate, we also include changelist
and this changelist can be identified based on Fixed/Closed Jiras
with the correct fix versions. However, sometimes we face few
inconsistencies between fixed Jira and Git commit message.
git_jira_fix_version_check.py script takes care of
identifying all git commits with commit
messages with any of these issues:
1. commit is reverted as per commit message
2. commit does not contain Jira number format in message
3. Jira does not have expected fixVersion
4. Jira has expected fixVersion, but it is not yet resolved
Moreover, this script also finds any resolved Jira with expected
fixVersion but without any corresponding commit present.
This should be useful as part of RC preparation.
git_jira_fix_version_check supports python3 and it required
installation of jira:
```
$ python3 --version
Python 3.9.7
$ python3 -m venv ./venv
$ ./venv/bin/pip install -r dev-support/git-jira-validation/requirements.txt
$ ./venv/bin/python dev-support/git-jira-validation/git_jira_fix_version_check.py
```
The script also requires below inputs:
```
1. First commit hash to start excluding commits from history:
Usually we can provide latest commit hash from last tagged release
so that the script will only loop through all commits in git commit
history before this commit hash. e.g for 3.3.2 release, we can provide
git hash: fa4915fdbbbec434ab41786cb17b82938a613f16
because this commit bumps up hadoop pom versions to 3.3.2:
https://github.com/apache/hadoop/commit/fa4915fdbbbec434ab41786cb17b82938a613f16
2. Fix Version:
Exact fixVersion that we would like to compare all Jira's fixVersions
with. e.g for 3.3.2 release, it should be 3.3.2.
3. JIRA Project Name:
The exact name of Project as case-sensitive e.g HADOOP / OZONE
4. Path of project's working dir with release branch checked-in:
Path of project from where we want to compare git hashes from. Local fork
of the project should be up-to date with upstream and expected release
branch should be checked-in.
5. Jira server url (default url: https://issues.apache.org/jira):
Default value of server points to ASF Jiras but this script can be
used outside of ASF Jira too.
```
Example of script execution:
```
JIRA Project Name (e.g HADOOP / OZONE etc): HADOOP
First commit hash to start excluding commits from history: fa4915fdbbbec434ab41786cb17b82938a613f16
Fix Version: 3.3.2
Jira server url (default: https://issues.apache.org/jira):
Path of project's working dir with release branch checked-in: /Users/vjasani/Documents/src/hadoop-3.3/hadoop
Check git status output and verify expected branch
On branch branch-3.3.2
Your branch is up to date with 'origin/branch-3.3.2'.
nothing to commit, working tree clean
Jira/Git commit message diff starting: ##############################################
Jira not present with version: 3.3.2. Commit: 8cd8e435fb43a251467ca74fadcb14f21a3e8163 HADOOP-17198. Support S3 Access Points (#3260) (branch-3.3.2) (#3955)
WARN: Jira not found. Commit: 8af28b7cca5c6020de94e739e5373afc69f399e5 Updated the index as per 3.3.2 release
WARN: Jira not found. Commit: e42e483d0085aa46543ebcb1196dd155ddb447d0 Make upstream aware of 3.3.1 release
Commit seems reverted. Commit: 6db1165380cd308fb74c9d17a35c1e57174d1e09 Revert "HDFS-14099. Unknown frame descriptor when decompressing multiple frames (#3836)"
Commit seems reverted. Commit: 1e3f94fa3c3d4a951d4f7438bc13e6f008f228f4 Revert "HDFS-16333. fix balancer bug when transfer an EC block (#3679)"
Jira not present with version: 3.3.2. Commit: ce0bc7b473a62a580c1227a4de6b10b64b045d3a HDFS-16344. Improve DirectoryScanner.Stats#toString (#3695)
Jira not present with version: 3.3.2. Commit: 30f0629d6e6f735c9f4808022f1a1827c5531f75 HDFS-16339. Show the threshold when mover threads quota is exceeded (#3689)
Jira not present with version: 3.3.2. Commit: e449daccf486219e3050254d667b74f92e8fc476 YARN-11007. Correct words in YARN documents (#3680)
Commit seems reverted. Commit: 5c189797828e60a3329fd920ecfb99bcbccfd82d Revert "HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686)"
Jira not present with version: 3.3.2. Commit: 544dffd179ed756bc163e4899e899a05b93d9234 HDFS-16171. De-flake testDecommissionStatus (#3280)
Jira not present with version: 3.3.2. Commit: c6914b1cb6e4cab8263cd3ae5cc00bc7a8de25de HDFS-16350. Datanode start time should be set after RPC server starts successfully (#3711)
Jira not present with version: 3.3.2. Commit: 328d3b84dfda9399021ccd1e3b7afd707e98912d HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686)
Jira not present with version: 3.3.2. Commit: 3ae8d4ccb911c9ababd871824a2fafbb0272c016 HDFS-16336. De-flake TestRollingUpgrade#testRollback (#3686)
Jira not present with version: 3.3.2. Commit: 15d3448e25c797b7d0d401afdec54683055d4bb5 HADOOP-17975. Fallback to simple auth does not work for a secondary DistributedFileSystem instance. (#3579)
Jira not present with version: 3.3.2. Commit: dd50261219de71eaa0a1ad28529953e12dfb92e0 YARN-10991. Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method (#3592)
Jira not present with version: 3.3.2. Commit: ef462b21bf03b10361d2f9ea7b47d0f7360e517f HDFS-16332. Handle invalid token exception in sasl handshake (#3677)
WARN: Jira not found. Commit: b55edde7071419410ea5bea4ce6462b980e48f5b Also update hadoop.version to 3.3.2
...
...
...
Found first commit hash after which git history is redundant. commit: fa4915fdbbbec434ab41786cb17b82938a613f16
Exiting successfully
Jira/Git commit message diff completed: ##############################################
Any resolved Jira with fixVersion 3.3.2 but corresponding commit not present
Starting diff: ##############################################
HADOOP-18066 is marked resolved with fixVersion 3.3.2 but no corresponding commit found
HADOOP-17936 is marked resolved with fixVersion 3.3.2 but no corresponding commit found
Completed diff: ##############################################
```

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
############################################################################
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
############################################################################
"""An application to assist Release Managers with ensuring that histories in
Git and fixVersions in JIRA are in agreement. See README.md for a detailed
explanation.
"""
import os
import re
import subprocess
from jira import JIRA
jira_project_name = input("JIRA Project Name (e.g HADOOP / OZONE etc): ") \
or "HADOOP"
# Define project_jira_keys with - appended. e.g for HADOOP Jiras,
# project_jira_keys should include HADOOP-, HDFS-, YARN-, MAPREDUCE-
project_jira_keys = [jira_project_name + '-']
if jira_project_name == 'HADOOP':
project_jira_keys.append('HDFS-')
project_jira_keys.append('YARN-')
project_jira_keys.append('MAPREDUCE-')
first_exclude_commit_hash = input("First commit hash to start excluding commits from history: ")
fix_version = input("Fix Version: ")
jira_server_url = input(
"Jira server url (default: https://issues.apache.org/jira): ") \
or "https://issues.apache.org/jira"
jira = JIRA(server=jira_server_url)
local_project_dir = input("Path of project's working dir with release branch checked-in: ")
os.chdir(local_project_dir)
GIT_STATUS_MSG = subprocess.check_output(['git', 'status']).decode("utf-8")
print('\nCheck git status output and verify expected branch\n')
print(GIT_STATUS_MSG)
print('\nJira/Git commit message diff starting: ##############################################')
issue_set_from_commit_msg = set()
for commit in subprocess.check_output(['git', 'log', '--pretty=oneline']).decode(
"utf-8").splitlines():
if commit.startswith(first_exclude_commit_hash):
print("Found first commit hash after which git history is redundant. commit: "
+ first_exclude_commit_hash)
print("Exiting successfully")
break
if re.search('revert', commit, re.IGNORECASE):
print("Commit seems reverted. \t\t\t Commit: " + commit)
continue
ACTUAL_PROJECT_JIRA = None
for project_jira in project_jira_keys:
if project_jira in commit:
ACTUAL_PROJECT_JIRA = project_jira
break
if not ACTUAL_PROJECT_JIRA:
print("WARN: Jira not found. \t\t\t Commit: " + commit)
continue
JIRA_NUM = ''
for c in commit.split(ACTUAL_PROJECT_JIRA)[1]:
if c.isdigit():
JIRA_NUM = JIRA_NUM + c
else:
break
issue = jira.issue(ACTUAL_PROJECT_JIRA + JIRA_NUM)
EXPECTED_FIX_VERSION = False
for version in issue.fields.fixVersions:
if version.name == fix_version:
EXPECTED_FIX_VERSION = True
break
if not EXPECTED_FIX_VERSION:
print("Jira not present with version: " + fix_version + ". \t Commit: " + commit)
continue
if issue.fields.status is None or issue.fields.status.name not in ('Resolved', 'Closed'):
print("Jira is not resolved yet? \t\t Commit: " + commit)
else:
# This means Jira corresponding to current commit message is resolved with expected
# fixVersion.
# This is no-op by default, if needed, convert to print statement.
issue_set_from_commit_msg.add(ACTUAL_PROJECT_JIRA + JIRA_NUM)
print('Jira/Git commit message diff completed: ##############################################')
print('\nAny resolved Jira with fixVersion ' + fix_version
+ ' but corresponding commit not present')
print('Starting diff: ##############################################')
all_issues_with_fix_version = jira.search_issues(
'project=' + jira_project_name + ' and status in (Resolved,Closed) and fixVersion='
+ fix_version)
for issue in all_issues_with_fix_version:
if issue.key not in issue_set_from_commit_msg:
print(issue.key + ' is marked resolved with fixVersion ' + fix_version
+ ' but no corresponding commit found')
print('Completed diff: ##############################################')

View File

@ -0,0 +1,18 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
jira==3.1.1

201
dev-support/hadoop-vote.sh Executable file
View File

@ -0,0 +1,201 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This script is useful to perform basic sanity tests for the given
# Hadoop RC. It checks for the Checksum, Signature, Rat check,
# Build from source and building tarball from the source.
set -e -o pipefail
usage() {
SCRIPT=$(basename "${BASH_SOURCE[@]}")
cat << __EOF
hadoop-vote. A script for standard vote which verifies the following items
1. Checksum of sources and binaries
2. Signature of sources and binaries
3. Rat check
4. Built from source
5. Built tar from source
Usage: ${SCRIPT} -s | --source <url> [-k | --key <signature>] [-f | --keys-file-url <url>] [-o | --output-dir </path/to/use>] [-D property[=value]] [-P profiles]
${SCRIPT} -h | --help
-h | --help Show this screen.
-s | --source '<url>' A URL pointing to the release candidate sources and binaries
e.g. https://dist.apache.org/repos/dist/dev/hadoop/hadoop-<version>RC0/
-k | --key '<signature>' A signature of the public key, e.g. 9AD2AE49
-f | --keys-file-url '<url>' the URL of the key file, default is
https://downloads.apache.org/hadoop/common/KEYS
-o | --output-dir '</path>' directory which has the stdout and stderr of each verification target
-D | list of maven properties to set for the mvn invocations, e.g. <-D hbase.profile=2.0 -D skipTests> Defaults to unset
-P | list of maven profiles to set for the build from source, e.g. <-P native -P yarn-ui>
__EOF
}
MVN_PROPERTIES=()
MVN_PROFILES=()
while ((${#})); do
case "${1}" in
-h | --help )
usage; exit 0 ;;
-s | --source )
SOURCE_URL="${2}"; shift 2 ;;
-k | --key )
SIGNING_KEY="${2}"; shift 2 ;;
-f | --keys-file-url )
KEY_FILE_URL="${2}"; shift 2 ;;
-o | --output-dir )
OUTPUT_DIR="${2}"; shift 2 ;;
-D )
MVN_PROPERTIES+=("-D ${2}"); shift 2 ;;
-P )
MVN_PROFILES+=("-P ${2}"); shift 2 ;;
* )
usage >&2; exit 1 ;;
esac
done
# Source url must be provided
if [ -z "${SOURCE_URL}" ]; then
usage;
exit 1
fi
cat << __EOF
Although This tool helps verifying Hadoop RC build and unit tests,
operator may still consider verifying the following manually:
1. Verify the API compatibility report
2. Integration/performance/benchmark tests
3. Object store specific Integration tests against an endpoint
4. Verify overall unit test stability from Jenkins builds or locally
5. Other concerns if any
__EOF
[[ "${SOURCE_URL}" != */ ]] && SOURCE_URL="${SOURCE_URL}/"
HADOOP_RC_VERSION=$(tr "/" "\n" <<< "${SOURCE_URL}" | tail -n2)
HADOOP_VERSION=$(echo "${HADOOP_RC_VERSION}" | sed -e 's/-RC[0-9]//g' | sed -e 's/hadoop-//g')
JAVA_VERSION=$(java -version 2>&1 | cut -f3 -d' ' | head -n1 | sed -e 's/"//g')
OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)}"
if [ ! -d "${OUTPUT_DIR}" ]; then
echo "Output directory ${OUTPUT_DIR} does not exist, please create it before running this script."
exit 1
fi
OUTPUT_PATH_PREFIX="${OUTPUT_DIR}"/"${HADOOP_RC_VERSION}"
# default value for verification targets, 0 = failed
SIGNATURE_PASSED=0
CHECKSUM_PASSED=0
RAT_CHECK_PASSED=0
BUILD_FROM_SOURCE_PASSED=0
BUILD_TAR_FROM_SOURCE_PASSED=0
function download_and_import_keys() {
KEY_FILE_URL="${KEY_FILE_URL:-https://downloads.apache.org/hadoop/common/KEYS}"
echo "Obtain and import the publisher key(s) from ${KEY_FILE_URL}"
# download the keys file into file KEYS
wget -O KEYS "${KEY_FILE_URL}"
gpg --import KEYS
if [ -n "${SIGNING_KEY}" ]; then
gpg --list-keys "${SIGNING_KEY}"
fi
}
function download_release_candidate () {
# get all files from release candidate repo
wget -r -np -N -nH --cut-dirs 4 "${SOURCE_URL}"
}
function verify_signatures() {
rm -f "${OUTPUT_PATH_PREFIX}"_verify_signatures
for file in *.tar.gz; do
gpg --verify "${file}".asc "${file}" 2>&1 | tee -a "${OUTPUT_PATH_PREFIX}"_verify_signatures && SIGNATURE_PASSED=1 || SIGNATURE_PASSED=0
done
}
function verify_checksums() {
rm -f "${OUTPUT_PATH_PREFIX}"_verify_checksums
SHA_EXT=$(find . -name "*.sha*" | awk -F '.' '{ print $NF }' | head -n 1)
for file in *.tar.gz; do
sha512sum --tag "${file}" > "${file}"."${SHA_EXT}".tmp
diff "${file}"."${SHA_EXT}".tmp "${file}"."${SHA_EXT}" 2>&1 | tee -a "${OUTPUT_PATH_PREFIX}"_verify_checksums && CHECKSUM_PASSED=1 || CHECKSUM_PASSED=0
rm -f "${file}"."${SHA_EXT}".tmp
done
}
function unzip_from_source() {
tar -zxvf hadoop-"${HADOOP_VERSION}"-src.tar.gz
cd hadoop-"${HADOOP_VERSION}"-src
}
function rat_test() {
rm -f "${OUTPUT_PATH_PREFIX}"_rat_test
mvn clean apache-rat:check "${MVN_PROPERTIES[@]}" 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_rat_test && RAT_CHECK_PASSED=1
}
function build_from_source() {
rm -f "${OUTPUT_PATH_PREFIX}"_build_from_source
# No unit test run.
mvn clean install "${MVN_PROPERTIES[@]}" -DskipTests "${MVN_PROFILES[@]}" 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_build_from_source && BUILD_FROM_SOURCE_PASSED=1
}
function build_tar_from_source() {
rm -f "${OUTPUT_PATH_PREFIX}"_build_tar_from_source
# No unit test run.
mvn clean package "${MVN_PROPERTIES[@]}" -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_build_tar_from_source && BUILD_TAR_FROM_SOURCE_PASSED=1
}
function execute() {
${1} || print_when_exit
}
function print_when_exit() {
cat << __EOF
* Signature: $( ((SIGNATURE_PASSED)) && echo "ok" || echo "failed" )
* Checksum : $( ((CHECKSUM_PASSED)) && echo "ok" || echo "failed" )
* Rat check (${JAVA_VERSION}): $( ((RAT_CHECK_PASSED)) && echo "ok" || echo "failed" )
- mvn clean apache-rat:check ${MVN_PROPERTIES[@]}
* Built from source (${JAVA_VERSION}): $( ((BUILD_FROM_SOURCE_PASSED)) && echo "ok" || echo "failed" )
- mvn clean install ${MVN_PROPERTIES[@]} -DskipTests ${MVN_PROFILES[@]}
* Built tar from source (${JAVA_VERSION}): $( ((BUILD_TAR_FROM_SOURCE_PASSED)) && echo "ok" || echo "failed" )
- mvn clean package ${MVN_PROPERTIES[@]} -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true
__EOF
if ((CHECKSUM_PASSED)) && ((SIGNATURE_PASSED)) && ((RAT_CHECK_PASSED)) && ((BUILD_FROM_SOURCE_PASSED)) && ((BUILD_TAR_FROM_SOURCE_PASSED)) ; then
exit 0
fi
exit 1
}
pushd "${OUTPUT_DIR}"
download_and_import_keys
download_release_candidate
execute verify_signatures
execute verify_checksums
execute unzip_from_source
execute rat_test
execute build_from_source
execute build_tar_from_source
popd
print_when_exit

250
dev-support/jenkins.sh Normal file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is called from the Jenkinsfile, which ultimately runs
# the CI through Yetus.
# We use Ubuntu Focal as the main platform for building Hadoop, thus
# it runs for all the PRs. Additionally, we also ensure that
# Hadoop builds across the supported platforms whenever there's a change
# in any of the C/C++ files, C/C++ build files or platform changes.
## @description Check if the given extension is related to C/C++
## @param seeking
## @return 0 if yes
## @return 1 if no
is_c_cpp_extension() {
local c_cpp_extension=("c" "cc" "cpp" "h" "hpp")
local seeking=$1
for element in "${c_cpp_extension[@]}"; do
if [[ $element == "$seeking" ]]; then
return 0
fi
done
return 1
}
## @description Check if the given relative path corresponds to
## change in platform files
## @param in_path
## @return 0 if yes
## @return 1 if no
is_platform_change() {
declare in_path
in_path="${SOURCEDIR}"/"${1}"
for path in "${SOURCEDIR}"/dev-support/docker/Dockerfile* "${SOURCEDIR}"/dev-support/docker/pkg-resolver/*.json; do
if [ "${in_path}" == "${path}" ]; then
echo "Found C/C++ platform related changes in ${in_path}"
return 0
fi
done
return 1
}
## @description Checks if the given path corresponds to a change
## in C/C++ files or related to C/C++ build system
## @param path
## @return 0 if yes
## @return 1 if no
is_c_cpp_change() {
shopt -s nocasematch
local path=$1
declare filename
filename=$(basename -- "${path}")
extension=${filename##*.}
if is_c_cpp_extension "${extension}"; then
echo "Found C/C++ changes in ${path}"
return 0
fi
if [[ $filename =~ CMakeLists\.txt ]]; then
echo "Found C/C++ build related changes in ${path}"
return 0
fi
return 1
}
## @description Check if the CI needs to be run - CI will always run if
## IS_OPTIONAL is 0, or if there's any change in
## C/C++ files or C/C++ build or platform
## @return 0 if yes
## @return 1 if no
function check_ci_run() {
# Get the first commit of this PR relative to the trunk branch
firstCommitOfThisPr=$(git --git-dir "${SOURCEDIR}/.git" rev-parse origin/trunk)
# Loop over the paths of all the changed files and check if the criteria
# to run the CI has been satisfied
for path in $(git --git-dir "${SOURCEDIR}/.git" diff --name-only "${firstCommitOfThisPr}" HEAD); do
if is_c_cpp_change "${path}"; then
return 0
fi
if is_platform_change "${path}"; then
return 0
fi
done
# We must run the CI if it's not optional
if [ "$IS_OPTIONAL" -eq 0 ]; then
return 0
fi
return 1
}
## @description Run the CI using YETUS
function run_ci() {
TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/test-patch.sh"
# this must be clean for every run
if [[ -d "${PATCHDIR}" ]]; then
rm -rf "${PATCHDIR:?}"
fi
mkdir -p "${PATCHDIR}"
# if given a JIRA issue, process it. If CHANGE_URL is set
# (e.g., Github Branch Source plugin), process it.
# otherwise exit, because we don't want Hadoop to do a
# full build. We wouldn't normally do this check for smaller
# projects. :)
if [[ -n "${JIRA_ISSUE_KEY}" ]]; then
YETUS_ARGS+=("${JIRA_ISSUE_KEY}")
elif [[ -z "${CHANGE_URL}" ]]; then
echo "Full build skipped" >"${PATCHDIR}/report.html"
exit 0
fi
YETUS_ARGS+=("--patch-dir=${PATCHDIR}")
# where the source is located
YETUS_ARGS+=("--basedir=${SOURCEDIR}")
# our project defaults come from a personality file
YETUS_ARGS+=("--project=hadoop")
YETUS_ARGS+=("--personality=${SOURCEDIR}/dev-support/bin/hadoop.sh")
# lots of different output formats
YETUS_ARGS+=("--brief-report-file=${PATCHDIR}/brief.txt")
YETUS_ARGS+=("--console-report-file=${PATCHDIR}/console.txt")
YETUS_ARGS+=("--html-report-file=${PATCHDIR}/report.html")
# enable writing back to Github
YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
# auto-kill any surefire stragglers during unit test runs
YETUS_ARGS+=("--reapermode=kill")
# set relatively high limits for ASF machines
# changing these to higher values may cause problems
# with other jobs on systemd-enabled machines
YETUS_ARGS+=("--proclimit=5500")
YETUS_ARGS+=("--dockermemlimit=22g")
# -1 spotbugs issues that show up prior to the patch being applied
YETUS_ARGS+=("--spotbugs-strict-precheck")
# rsync these files back into the archive dir
YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,spotbugsXml.xml")
# URL for user-side presentation in reports and such to our artifacts
# (needs to match the archive bits below)
YETUS_ARGS+=("--build-url-artifacts=artifact/out")
# plugins to enable
YETUS_ARGS+=("--plugins=all,-jira")
# don't let these tests cause -1s because we aren't really paying that
# much attention to them
YETUS_ARGS+=("--tests-filter=checkstyle")
# run in docker mode and specifically point to our
# Dockerfile since we don't want to use the auto-pulled version.
YETUS_ARGS+=("--docker")
YETUS_ARGS+=("--dockerfile=${DOCKERFILE}")
YETUS_ARGS+=("--mvn-custom-repos")
# effectively treat dev-suport as a custom maven module
YETUS_ARGS+=("--skip-dirs=dev-support")
# help keep the ASF boxes clean
YETUS_ARGS+=("--sentinel")
# test with Java 8 and 11
YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")
YETUS_ARGS+=("--multijdktests=compile")
# custom javadoc goals
YETUS_ARGS+=("--mvn-javadoc-goals=process-sources,javadoc:javadoc-no-fork")
# write Yetus report as GitHub comment (YETUS-1102)
YETUS_ARGS+=("--github-write-comment")
YETUS_ARGS+=("--github-use-emoji-vote")
"${TESTPATCHBIN}" "${YETUS_ARGS[@]}"
}
## @description Cleans up the processes started by YETUS
function cleanup_ci_proc() {
# See YETUS-764
if [ -f "${PATCHDIR}/pidfile.txt" ]; then
echo "test-patch process appears to still be running: killing"
kill "$(cat "${PATCHDIR}/pidfile.txt")" || true
sleep 10
fi
if [ -f "${PATCHDIR}/cidfile.txt" ]; then
echo "test-patch container appears to still be running: killing"
docker kill "$(cat "${PATCHDIR}/cidfile.txt")" || true
fi
}
## @description Invokes github_status_recovery in YETUS's precommit
function github_status_recovery() {
YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
YETUS_ARGS+=("--patch-dir=${PATCHDIR}")
TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
/usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" "${EXTRA_ARGS}" || true
}
if [ -z "$1" ]; then
echo "Must specify an argument for jenkins.sh"
echo "run_ci - Runs the CI based on platform image as defined by DOCKERFILE"
echo "cleanup_ci_proc - Cleans up the processes spawned for running the CI"
echo "github_status_recovery - Sends Github status (refer to YETUS precommit for more details)"
exit 1
fi
# Process arguments to jenkins.sh
if [ "$1" == "run_ci" ]; then
# Check if the CI needs to be run, if so, do so :)
if check_ci_run; then
run_ci
else
echo "No C/C++ file or C/C++ build or platform changes found, will not run CI for this platform"
fi
elif [ "$1" == "cleanup_ci_proc" ]; then
cleanup_ci_proc
elif [ "$1" == "github_status_recovery" ]; then
github_status_recovery
else
echo "Don't know how to process $1"
exit 1
fi

View File

@ -69,7 +69,9 @@
<!-- Checks for line length violations. -->
<!-- See https://checkstyle.sourceforge.io/config_sizes.html#LineLength -->
<module name="LineLength"/>
<module name="LineLength">
<property name="max" value="100"/>
</module>
<module name="TreeWalker">
@ -120,9 +122,8 @@
<!-- Checks for imports -->
<!-- See http://checkstyle.sf.net/config_import.html -->
<module name="IllegalImport">
<property name="regexp" value="true"/>
<property name="illegalPkgs" value="^sun\.[^.]+, ^com\.google\.common\.[^.]+"/>
<property name="illegalClasses" value="^org\.apache\.hadoop\.thirdparty\.com\.google\.common\.base\.(Optional|Function|Predicate|Supplier), ^org\.apache\.hadoop\.thirdparty\.com\.google\.common\.collect\.(ImmutableListMultimap)"/>
<property name="regexp" value="true"/>
<property name="illegalPkgs" value="sun"/>
</module>
<module name="RedundantImport"/>
<module name="UnusedImports"/>
@ -158,7 +159,9 @@
<!-- Checks for blocks. You know, those {}'s -->
<!-- See http://checkstyle.sf.net/config_blocks.html -->
<module name="AvoidNestedBlocks"/>
<module name="AvoidNestedBlocks">
<property name="allowInSwitchCase" value="true"/>
</module>
<module name="EmptyBlock"/>
<module name="LeftCurly"/>
<module name="NeedBraces"/>

View File

@ -67,6 +67,13 @@
</exclusion>
</exclusions>
</dependency>
<!-- snappy-java is native library and cannot be relocated. So we explicitly exclude it
from shaded jar to prevent possible conflict. Make it as transitive dependency to
make the downstream pull it. -->
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</dependency>
</dependencies>
<profiles>
<profile>
@ -87,6 +94,10 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<configuration>
<createSourcesJar>true</createSourcesJar>
<shadeSourcesContent>true</shadeSourcesContent>
</configuration>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
@ -105,6 +116,10 @@
<includes>
<include>org.apache.hadoop:*</include>
</includes>
<excludes>
<!-- Leave snappy that includes native methods which cannot be relocated. -->
<exclude>org.xerial.snappy:*</exclude>
</excludes>
</artifactSet>
<filters>
<!-- We get these package level classes from various yarn api jars -->
@ -126,9 +141,7 @@
<excludes>
<exclude>org/apache/hadoop/*</exclude>
<exclude>org/apache/hadoop/**/*</exclude>
<!-- Our non-shaded htrace and logging libraries -->
<exclude>org/apache/htrace/*</exclude>
<exclude>org/apache/htrace/**/*</exclude>
<!-- Our non-shaded logging libraries -->
<exclude>org/slf4j/*</exclude>
<exclude>org/slf4j/**/*</exclude>
<exclude>org/apache/commons/logging/*</exclude>
@ -145,6 +158,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
<!-- Exclude snappy-java -->
<exclude>org/xerial/snappy/*</exclude>
<exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -163,6 +179,8 @@
<exclude>com/sun/security/**/*</exclude>
<exclude>com/sun/jndi/**/*</exclude>
<exclude>com/sun/management/**/*</exclude>
<exclude>com/ibm/security/*</exclude>
<exclude>com/ibm/security/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -221,6 +239,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
<!-- Exclude lz4-java -->
<exclude>net/jpountz/*</exclude>
<exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->

View File

@ -56,7 +56,7 @@
<dependency>
<groupId>org.codehaus.mojo</groupId>
<artifactId>extra-enforcer-rules</artifactId>
<version>1.0-beta-3</version>
<version>1.5.1</version>
</dependency>
</dependencies>
<executions>
@ -80,8 +80,6 @@
but enforcer still sees it.
-->
<exclude>org.apache.hadoop:hadoop-annotations</exclude>
<!-- We leave HTrace as an unshaded dependnecy on purpose so that tracing within a JVM will work -->
<exclude>org.apache.htrace:htrace-core4</exclude>
<!-- Leave slf4j unshaded so downstream users can configure logging. -->
<exclude>org.slf4j:slf4j-api</exclude>
<!-- Leave commons-logging unshaded so downstream users can configure logging. -->
@ -92,6 +90,8 @@
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
<!-- Leave snappy that includes native methods which cannot be relocated. -->
<exclude>org.xerial.snappy:*</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>

View File

@ -67,6 +67,8 @@ allowed_expr+="|^krb5_udp-template.conf$"
# Jetty uses this style sheet for directory listings. TODO ensure our
# internal use of jetty disallows directory listings and remove this.
allowed_expr+="|^jetty-dir.css$"
# Snappy java is native library. We cannot relocate it to under org/apache/hadoop.
allowed_expr+="|^org/xerial/"
allowed_expr+=")"
declare -i bad_artifacts=0

View File

@ -60,7 +60,7 @@
<dependency>
<groupId>org.codehaus.mojo</groupId>
<artifactId>extra-enforcer-rules</artifactId>
<version>1.0-beta-3</version>
<version>1.5.1</version>
</dependency>
</dependencies>
<executions>
@ -84,8 +84,6 @@
but enforcer still sees it.
-->
<exclude>org.apache.hadoop:hadoop-annotations</exclude>
<!-- We leave HTrace as an unshaded dependnecy on purpose so that tracing within a JVM will work -->
<exclude>org.apache.htrace:htrace-core4</exclude>
<!-- Leave slf4j unshaded so downstream users can configure logging. -->
<exclude>org.slf4j:slf4j-api</exclude>
<!-- Leave commons-logging unshaded so downstream users can configure logging. -->
@ -100,6 +98,8 @@
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
<!-- Leave snappy that includes native methods which cannot be relocated. -->
<exclude>org.xerial.snappy:*</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>

View File

@ -58,13 +58,6 @@ allowed_expr+="|^org.apache.hadoop.application-classloader.properties$"
allowed_expr+="|^java.policy$"
# * Used by javax.annotation
allowed_expr+="|^jndi.properties$"
# * allowing native libraries from rocksdb. Leaving native libraries as it is.
allowed_expr+="|^librocksdbjni-linux32.so"
allowed_expr+="|^librocksdbjni-linux64.so"
allowed_expr+="|^librocksdbjni-osx.jnilib"
allowed_expr+="|^librocksdbjni-win64.dll"
allowed_expr+="|^librocksdbjni-linux-ppc64le.so"
allowed_expr+=")"
declare -i bad_artifacts=0

View File

@ -52,6 +52,11 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<profile>
@ -179,6 +184,12 @@
<artifactId>hadoop-hdfs</artifactId>
<scope>test</scope>
<type>test-jar</type>
<exclusions>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm-commons</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
@ -186,6 +197,12 @@
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
</dependencies>
</profile>
</profiles>

View File

@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.hadoop.example;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.*;
import java.util.Arrays;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.RandomDatum;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Ensure that we can perform codec operations given the API and runtime jars
* by performing some simple smoke tests.
*/
public class ITUseHadoopCodecs {
private static final Logger LOG = LoggerFactory.getLogger(ITUseHadoopCodecs.class);
private Configuration haddopConf = new Configuration();
private int dataCount = 100;
private int dataSeed = new Random().nextInt();
@Test
public void testGzipCodec() throws IOException {
ZlibFactory.setNativeZlibLoaded(false);
assertFalse(ZlibFactory.isNativeZlibLoaded(haddopConf));
codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.GzipCodec");
codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.GzipCodec");
}
@Test
public void testSnappyCodec() throws IOException {
codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.SnappyCodec");
codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.SnappyCodec");
}
@Test
public void testLz4Codec() {
Arrays.asList(false, true).forEach(config -> {
haddopConf.setBoolean(
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
config);
try {
codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.Lz4Codec");
codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.Lz4Codec");
} catch (IOException e) {
throw new RuntimeException("failed when running codecTest", e);
}
});
}
private void codecTest(Configuration conf, int seed, int count, String codecClass)
throws IOException {
// Create the codec
CompressionCodec codec = null;
try {
codec = (CompressionCodec)
ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
} catch (ClassNotFoundException cnfe) {
throw new IOException("Illegal codec!");
}
LOG.info("Created a Codec object of type: " + codecClass);
// Generate data
DataOutputBuffer data = new DataOutputBuffer();
RandomDatum.Generator generator = new RandomDatum.Generator(seed);
for(int i = 0; i < count; ++i) {
generator.next();
RandomDatum key = generator.getKey();
RandomDatum value = generator.getValue();
key.write(data);
value.write(data);
}
LOG.info("Generated " + count + " records");
// Compress data
DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
try (CompressionOutputStream deflateFilter =
codec.createOutputStream(compressedDataBuffer);
DataOutputStream deflateOut =
new DataOutputStream(new BufferedOutputStream(deflateFilter))) {
deflateOut.write(data.getData(), 0, data.getLength());
deflateOut.flush();
deflateFilter.finish();
}
// De-compress data
DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0,
compressedDataBuffer.getLength());
DataInputBuffer originalData = new DataInputBuffer();
originalData.reset(data.getData(), 0, data.getLength());
try (CompressionInputStream inflateFilter =
codec.createInputStream(deCompressedDataBuffer);
DataInputStream originalIn =
new DataInputStream(new BufferedInputStream(originalData))) {
// Check
int expected;
do {
expected = originalIn.read();
assertEquals("Inflated stream read by byte does not match",
expected, inflateFilter.read());
} while (expected != -1);
}
LOG.info("SUCCESS! Completed checking " + count + " records");
}
}

View File

@ -40,6 +40,12 @@
<artifactId>hadoop-client-api</artifactId>
<scope>runtime</scope>
</dependency>
<!-- This is the api's compile dependency, but we don't want it to be compile dependency here too. -->
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
@ -326,6 +332,10 @@
<groupId>org.apache.hadoop.thirdparty</groupId>
<artifactId>hadoop-shaded-guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm-commons</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Add optional runtime dependency on the in-development timeline server module
@ -397,8 +407,8 @@
<!-- Skip commons-logging:commons-logging-api because it looks like nothing actually included it -->
<!-- Skip jetty-util because it's in client -->
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
<optional>true</optional>
<exclusions>
<exclusion>
@ -445,9 +455,19 @@
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
<optional>true</optional>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>javax.enterprise</groupId>
<artifactId>cdi-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- skip org.apache.avro:avro-ipc because it doesn't look like hadoop-common actually uses it -->
<dependency>
@ -672,7 +692,6 @@
<exclude>org.apache.hadoop:hadoop-client-api</exclude>
<exclude>org.apache.hadoop:hadoop-client-runtime</exclude>
<!-- Fine to expose our purposefully not-shaded deps as dependencies -->
<exclude>org.apache.htrace:htrace-core4</exclude>
<exclude>org.slf4j:slf4j-api</exclude>
<exclude>commons-logging:commons-logging</exclude>
<exclude>junit:junit</exclude>
@ -683,6 +702,9 @@
<!-- We need a filter that matches just those things that are included in the above artiacts -->
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
<!-- Leave snappy that includes native methods which cannot be relocated. -->
<exclude>org.xerial.snappy:*</exclude>
<exclude>javax.ws.rs:javax.ws.rs-api</exclude>
</excludes>
</artifactSet>
<filters>
@ -729,6 +751,12 @@
<exclude>testdata/*</exclude>
</excludes>
</filter>
<filter>
<artifact>com.fasterxml.jackson.*:*</artifact>
<excludes>
<exclude>META-INF/versions/11/module-info.class</exclude>
</excludes>
</filter>
<!-- Mockito tries to include its own unrelocated copy of hamcrest. :( -->
<filter>
@ -761,13 +789,6 @@
<exclude>xml.xsd</exclude>
</excludes>
</filter>
<!-- filtering HISTORY-JAVA.md from rocksdb jar -->
<filter>
<artifact>org.rocksdb:rocksdbjni</artifact>
<excludes>
<exclude>HISTORY-JAVA.md</exclude>
</excludes>
</filter>
<filter>
<!-- skip jetty license info already incorporated into LICENSE/NOTICE -->
<artifact>org.eclipse.jetty:*</artifact>
@ -840,6 +861,18 @@
<exclude>*/**</exclude>
</excludes>
</filter>
<filter>
<artifact>org.eclipse.jetty:jetty-util-ajax</artifact>
<excludes>
<exclude>*/**</exclude>
</excludes>
</filter>
<filter>
<artifact>org.eclipse.jetty:jetty-server</artifact>
<excludes>
<exclude>jetty-dir.css</exclude>
</excludes>
</filter>
</filters>
<!-- relocate classes from mssql-jdbc -->
@ -858,9 +891,7 @@
<excludes>
<exclude>org/apache/hadoop/*</exclude>
<exclude>org/apache/hadoop/**/*</exclude>
<!-- Our non-shaded htrace and logging libraries -->
<exclude>org/apache/htrace/*</exclude>
<exclude>org/apache/htrace/**/*</exclude>
<!-- Our non-shaded logging libraries -->
<exclude>org/slf4j/*</exclude>
<exclude>org/slf4j/**/*</exclude>
<exclude>org/apache/commons/logging/*</exclude>
@ -881,6 +912,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
<!-- Exclude snappy-java -->
<exclude>org/xerial/snappy/*</exclude>
<exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -906,6 +940,8 @@
<exclude>com/sun/security/**/*</exclude>
<exclude>com/sun/jndi/**/*</exclude>
<exclude>com/sun/management/**/*</exclude>
<exclude>com/ibm/security/*</exclude>
<exclude>com/ibm/security/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -999,6 +1035,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
<!-- Exclude lz4-java -->
<exclude>net/jpountz/*</exclude>
<exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->

View File

@ -60,6 +60,12 @@
<artifactId>hadoop-client-api</artifactId>
<scope>runtime</scope>
</dependency>
<!-- This is the api's compile dependency, but we don't want it to be compile dependency here too. -->
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<scope>runtime</scope>
</dependency>
<!-- This comes from our parent pom. If we don't expressly change it here to get included,
downstream will get warnings at compile time. -->
<dependency>
@ -75,15 +81,9 @@
</dependency>
<!-- Since hadoop-client is listed as optional, we have to list transitive
dependencies that we still want to show up.
* HTrace
* Slf4j API
* commons-logging
-->
<dependency>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core4</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
@ -146,8 +146,6 @@
<excludes>
<!-- We need a filter that matches just those things that aer included in the api jar -->
<exclude>org.apache.hadoop:hadoop-client-api</exclude>
<!-- Leave HTrace as an unshaded dependency on purpose, since a static class member is used to trace within a given JVM instance -->
<exclude>org.apache.htrace:htrace-core4</exclude>
<!-- Leave slf4j unshaded so downstream users can configure logging. -->
<exclude>org.slf4j:slf4j-api</exclude>
<!-- Leave commons-logging unshaded so downstream users can configure logging. -->
@ -163,6 +161,9 @@
<exclude>org.ow2.asm:*</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
<!-- Leave snappy that includes native methods which cannot be relocated. -->
<exclude>org.xerial.snappy:*</exclude>
<exclude>javax.ws.rs:javax.ws.rs-api</exclude>
</excludes>
</artifactSet>
<filters>
@ -242,6 +243,12 @@
<exclude>google/protobuf/**/*.proto</exclude>
</excludes>
</filter>
<filter>
<artifact>com.fasterxml.jackson.*:*</artifact>
<excludes>
<exclude>META-INF/versions/11/module-info.class</exclude>
</excludes>
</filter>
</filters>
<relocations>
<relocation>
@ -250,9 +257,7 @@
<excludes>
<exclude>org/apache/hadoop/*</exclude>
<exclude>org/apache/hadoop/**/*</exclude>
<!-- Our non-shaded htrace and logging libraries -->
<exclude>org/apache/htrace/*</exclude>
<exclude>org/apache/htrace/**/*</exclude>
<!-- Our non-shaded logging libraries -->
<exclude>org/slf4j/*</exclude>
<exclude>org/slf4j/**/*</exclude>
<exclude>org/apache/commons/logging/*</exclude>
@ -269,6 +274,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
<!-- Exclude snappy-java -->
<exclude>org/xerial/snappy/*</exclude>
<exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -287,6 +295,8 @@
<exclude>com/sun/security/**/*</exclude>
<exclude>com/sun/jndi/**/*</exclude>
<exclude>com/sun/management/**/*</exclude>
<exclude>com/ibm/security/*</exclude>
<exclude>com/ibm/security/**/*</exclude>
</excludes>
</relocation>
<relocation>
@ -359,6 +369,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
<!-- Exclude lz4-java -->
<exclude>net/jpountz/*</exclude>
<exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->

View File

@ -101,6 +101,10 @@
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
@ -133,5 +137,10 @@
<artifactId>hadoop-cos</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-huaweicloud</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
</project>

View File

@ -64,10 +64,9 @@
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<configuration>
<findbugsXmlOutput>true</findbugsXmlOutput>
<xmlOutput>true</xmlOutput>
<excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml
</excludeFilterFile>

View File

@ -28,11 +28,11 @@ import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@ -71,8 +71,8 @@ public class CosNFileSystem extends FileSystem {
private String owner = "Unknown";
private String group = "Unknown";
private ListeningExecutorService boundedIOThreadPool;
private ListeningExecutorService boundedCopyThreadPool;
private ExecutorService boundedIOThreadPool;
private ExecutorService boundedCopyThreadPool;
public CosNFileSystem() {
}

View File

@ -24,7 +24,7 @@ import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.util.Preconditions;
import com.qcloud.cos.auth.AnonymousCOSCredentials;
import com.qcloud.cos.auth.COSCredentials;
import com.qcloud.cos.auth.COSCredentialsProvider;

View File

@ -0,0 +1,18 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
</FindBugsFilter>

View File

@ -0,0 +1,191 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.4.0-SNAPSHOT</version>
<relativePath>../../hadoop-project</relativePath>
</parent>
<artifactId>hadoop-huaweicloud</artifactId>
<version>3.4.0-SNAPSHOT</version>
<name>Apache Hadoop OBS support</name>
<description>
This module contains code to support integration with OBS.
It also declares the dependencies needed to work with OBS services.
</description>
<packaging>jar</packaging>
<properties>
<file.encoding>UTF-8</file.encoding>
<downloadSources>true</downloadSources>
<esdk.version>3.20.4.2</esdk.version>
</properties>
<profiles>
<profile>
<id>tests-off</id>
<activation>
<file>
<missing>src/test/resources/auth-keys.xml</missing>
</file>
</activation>
<properties>
<maven.test.skip>true</maven.test.skip>
</properties>
</profile>
<profile>
<id>tests-on</id>
<activation>
<file>
<exists>src/test/resources/auth-keys.xml</exists>
</file>
</activation>
<properties>
<maven.test.skip>false</maven.test.skip>
</properties>
</profile>
</profiles>
<build>
<plugins>
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<configuration>
<xmlOutput>true</xmlOutput>
<excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml
</excludeFilterFile>
<effort>Max</effort>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkedProcessTimeoutInSeconds>3600</forkedProcessTimeoutInSeconds>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<outputFile>${project.basedir}/target/hadoop-cloud-storage-deps/${project.artifactId}.cloud-storage-optional.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
<exclusion>
<groupId>org.javassist</groupId>
<artifactId>javassist</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-examples</artifactId>
<scope>test</scope>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-distcp</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-distcp</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>com.huaweicloud</groupId>
<artifactId>esdk-obs-java</artifactId>
<version>${esdk.version}</version>
<exclusions>
<exclusion>
<artifactId>okio</artifactId>
<groupId>com.squareup.okio</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-core</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j-api</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>1.7.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>1.7.4</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
/**
* Interface class for getting basic session credential.
*/
public interface BasicSessionCredential {
/**
* Get OBS access key.
*
* @return OBS access key
*/
String getOBSAccessKeyId();
/**
* Get OBS secret key.
*
* @return OBS secret key
*/
String getOBSSecretKey();
/**
* Get session token.
*
* @return session token
*/
String getSessionToken();
}

View File

@ -0,0 +1,361 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.IObsCredentialsProvider;
import com.obs.services.ObsClient;
import com.obs.services.ObsConfiguration;
import com.obs.services.internal.ext.ExtObsConfiguration;
import com.obs.services.model.AuthTypeEnum;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.util.Optional;
/**
* The default factory implementation, which calls the OBS SDK to configure and
* create an {@link ObsClient} that communicates with the OBS service.
*/
class DefaultOBSClientFactory extends Configured implements OBSClientFactory {
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
DefaultOBSClientFactory.class);
/**
* Initializes all OBS SDK settings related to connection management.
*
* @param conf Hadoop configuration
* @param obsConf OBS SDK configuration
*/
@SuppressWarnings("deprecation")
private static void initConnectionSettings(final Configuration conf,
final ExtObsConfiguration obsConf) {
obsConf.setMaxConnections(
OBSCommonUtils.intOption(conf, OBSConstants.MAXIMUM_CONNECTIONS,
OBSConstants.DEFAULT_MAXIMUM_CONNECTIONS,
1));
boolean secureConnections = conf.getBoolean(
OBSConstants.SECURE_CONNECTIONS,
OBSConstants.DEFAULT_SECURE_CONNECTIONS);
obsConf.setHttpsOnly(secureConnections);
obsConf.setMaxErrorRetry(
OBSCommonUtils.intOption(conf, OBSConstants.MAX_ERROR_RETRIES,
OBSConstants.DEFAULT_MAX_ERROR_RETRIES, 0));
obsConf.setConnectionTimeout(
OBSCommonUtils.intOption(conf, OBSConstants.ESTABLISH_TIMEOUT,
OBSConstants.DEFAULT_ESTABLISH_TIMEOUT, 0));
obsConf.setSocketTimeout(
OBSCommonUtils.intOption(conf, OBSConstants.SOCKET_TIMEOUT,
OBSConstants.DEFAULT_SOCKET_TIMEOUT, 0));
obsConf.setIdleConnectionTime(
OBSCommonUtils.intOption(conf, OBSConstants.IDLE_CONNECTION_TIME,
OBSConstants.DEFAULT_IDLE_CONNECTION_TIME,
1));
obsConf.setMaxIdleConnections(
OBSCommonUtils.intOption(conf, OBSConstants.MAX_IDLE_CONNECTIONS,
OBSConstants.DEFAULT_MAX_IDLE_CONNECTIONS,
1));
obsConf.setReadBufferSize(
OBSCommonUtils.intOption(conf, OBSConstants.READ_BUFFER_SIZE,
OBSConstants.DEFAULT_READ_BUFFER_SIZE,
-1)); // to be
// modified
obsConf.setWriteBufferSize(
OBSCommonUtils.intOption(conf, OBSConstants.WRITE_BUFFER_SIZE,
OBSConstants.DEFAULT_WRITE_BUFFER_SIZE,
-1)); // to be
// modified
obsConf.setUploadStreamRetryBufferSize(
OBSCommonUtils.intOption(conf,
OBSConstants.UPLOAD_STREAM_RETRY_SIZE,
OBSConstants.DEFAULT_UPLOAD_STREAM_RETRY_SIZE, 1));
obsConf.setSocketReadBufferSize(
OBSCommonUtils.intOption(conf, OBSConstants.SOCKET_RECV_BUFFER,
OBSConstants.DEFAULT_SOCKET_RECV_BUFFER, -1));
obsConf.setSocketWriteBufferSize(
OBSCommonUtils.intOption(conf, OBSConstants.SOCKET_SEND_BUFFER,
OBSConstants.DEFAULT_SOCKET_SEND_BUFFER, -1));
obsConf.setKeepAlive(conf.getBoolean(OBSConstants.KEEP_ALIVE,
OBSConstants.DEFAULT_KEEP_ALIVE));
obsConf.setValidateCertificate(
conf.getBoolean(OBSConstants.VALIDATE_CERTIFICATE,
OBSConstants.DEFAULT_VALIDATE_CERTIFICATE));
obsConf.setVerifyResponseContentType(
conf.getBoolean(OBSConstants.VERIFY_RESPONSE_CONTENT_TYPE,
OBSConstants.DEFAULT_VERIFY_RESPONSE_CONTENT_TYPE));
obsConf.setCname(
conf.getBoolean(OBSConstants.CNAME, OBSConstants.DEFAULT_CNAME));
obsConf.setIsStrictHostnameVerification(
conf.getBoolean(OBSConstants.STRICT_HOSTNAME_VERIFICATION,
OBSConstants.DEFAULT_STRICT_HOSTNAME_VERIFICATION));
// sdk auth type negotiation enable
obsConf.setAuthTypeNegotiation(
conf.getBoolean(OBSConstants.SDK_AUTH_TYPE_NEGOTIATION_ENABLE,
OBSConstants.DEFAULT_SDK_AUTH_TYPE_NEGOTIATION_ENABLE));
// set SDK AUTH TYPE to OBS when auth type negotiation unenabled
if (!obsConf.isAuthTypeNegotiation()) {
obsConf.setAuthType(AuthTypeEnum.OBS);
}
// okhttp retryOnConnectionFailure switch, default set to true
obsConf.retryOnConnectionFailureInOkhttp(
conf.getBoolean(OBSConstants.SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE,
OBSConstants.DEFAULT_SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE));
// sdk max retry times on unexpected end of stream exception,
// default: -1 don't retry
int retryTime = conf.getInt(
OBSConstants.SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION,
OBSConstants.DEFAULT_SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION);
if (retryTime > 0
&& retryTime < OBSConstants.DEFAULT_MAX_SDK_CONNECTION_RETRY_TIMES
|| !obsConf.isRetryOnConnectionFailureInOkhttp() && retryTime < 0) {
retryTime = OBSConstants.DEFAULT_MAX_SDK_CONNECTION_RETRY_TIMES;
}
obsConf.setMaxRetryOnUnexpectedEndException(retryTime);
}
/**
* Initializes OBS SDK proxy support if configured.
*
* @param conf Hadoop configuration
* @param obsConf OBS SDK configuration
* @throws IllegalArgumentException if misconfigured
* @throws IOException on any failure to initialize proxy
*/
private static void initProxySupport(final Configuration conf,
final ExtObsConfiguration obsConf)
throws IllegalArgumentException, IOException {
String proxyHost = conf.getTrimmed(OBSConstants.PROXY_HOST, "");
int proxyPort = conf.getInt(OBSConstants.PROXY_PORT, -1);
if (!proxyHost.isEmpty() && proxyPort < 0) {
if (conf.getBoolean(OBSConstants.SECURE_CONNECTIONS,
OBSConstants.DEFAULT_SECURE_CONNECTIONS)) {
LOG.warn("Proxy host set without port. Using HTTPS default "
+ OBSConstants.DEFAULT_HTTPS_PORT);
obsConf.getHttpProxy()
.setProxyPort(OBSConstants.DEFAULT_HTTPS_PORT);
} else {
LOG.warn("Proxy host set without port. Using HTTP default "
+ OBSConstants.DEFAULT_HTTP_PORT);
obsConf.getHttpProxy()
.setProxyPort(OBSConstants.DEFAULT_HTTP_PORT);
}
}
String proxyUsername = conf.getTrimmed(OBSConstants.PROXY_USERNAME);
String proxyPassword = null;
char[] proxyPass = conf.getPassword(OBSConstants.PROXY_PASSWORD);
if (proxyPass != null) {
proxyPassword = new String(proxyPass).trim();
}
if ((proxyUsername == null) != (proxyPassword == null)) {
String msg =
"Proxy error: " + OBSConstants.PROXY_USERNAME + " or "
+ OBSConstants.PROXY_PASSWORD
+ " set without the other.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
obsConf.setHttpProxy(proxyHost, proxyPort, proxyUsername,
proxyPassword);
if (LOG.isDebugEnabled()) {
LOG.debug(
"Using proxy server {}:{} as user {} on "
+ "domain {} as workstation {}",
obsConf.getHttpProxy().getProxyAddr(),
obsConf.getHttpProxy().getProxyPort(),
obsConf.getHttpProxy().getProxyUName(),
obsConf.getHttpProxy().getDomain(),
obsConf.getHttpProxy().getWorkstation());
}
}
/**
* Creates an {@link ObsClient} from the established configuration.
*
* @param conf Hadoop configuration
* @param obsConf ObsConfiguration
* @param name URL
* @return ObsClient client
* @throws IOException on any failure to create Huawei OBS client
*/
private static ObsClient createHuaweiObsClient(final Configuration conf,
final ObsConfiguration obsConf, final URI name)
throws IOException {
Class<?> credentialsProviderClass;
BasicSessionCredential credentialsProvider;
ObsClient obsClient;
try {
credentialsProviderClass = conf.getClass(
OBSConstants.OBS_CREDENTIALS_PROVIDER, null);
} catch (RuntimeException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException(
"From option " + OBSConstants.OBS_CREDENTIALS_PROVIDER + ' '
+ c, c);
}
if (credentialsProviderClass == null) {
return createObsClientWithoutCredentialsProvider(conf, obsConf,
name);
}
try {
Constructor<?> cons =
credentialsProviderClass.getDeclaredConstructor(URI.class,
Configuration.class);
credentialsProvider = (BasicSessionCredential) cons.newInstance(
name, conf);
} catch (NoSuchMethodException
| SecurityException
| IllegalAccessException
| InstantiationException
| InvocationTargetException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException(
"From option " + OBSConstants.OBS_CREDENTIALS_PROVIDER + ' '
+ c, c);
}
String sessionToken = credentialsProvider.getSessionToken();
String ak = credentialsProvider.getOBSAccessKeyId();
String sk = credentialsProvider.getOBSSecretKey();
String endPoint = conf.getTrimmed(OBSConstants.ENDPOINT, "");
obsConf.setEndPoint(endPoint);
if (sessionToken != null && sessionToken.length() != 0) {
obsClient = new ObsClient(ak, sk, sessionToken, obsConf);
} else {
obsClient = new ObsClient(ak, sk, obsConf);
}
return obsClient;
}
private static ObsClient createObsClientWithoutCredentialsProvider(
final Configuration conf, final ObsConfiguration obsConf,
final URI name) throws IOException {
ObsClient obsClient;
OBSLoginHelper.Login creds = OBSCommonUtils.getOBSAccessKeys(name,
conf);
String ak = creds.getUser();
String sk = creds.getPassword();
String token = creds.getToken();
String endPoint = conf.getTrimmed(OBSConstants.ENDPOINT, "");
obsConf.setEndPoint(endPoint);
if (!StringUtils.isEmpty(ak) || !StringUtils.isEmpty(sk)) {
obsClient = new ObsClient(ak, sk, token, obsConf);
return obsClient;
}
Class<?> securityProviderClass;
try {
securityProviderClass = conf.getClass(
OBSConstants.OBS_SECURITY_PROVIDER, null);
LOG.info("From option {} get {}",
OBSConstants.OBS_SECURITY_PROVIDER, securityProviderClass);
} catch (RuntimeException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException(
"From option " + OBSConstants.OBS_SECURITY_PROVIDER + ' ' + c,
c);
}
if (securityProviderClass == null) {
obsClient = new ObsClient(ak, sk, token, obsConf);
return obsClient;
}
IObsCredentialsProvider securityProvider;
try {
Optional<Constructor> cons = tryGetConstructor(
securityProviderClass,
new Class[] {URI.class, Configuration.class});
if (cons.isPresent()) {
securityProvider = (IObsCredentialsProvider) cons.get()
.newInstance(name, conf);
} else {
securityProvider
= (IObsCredentialsProvider) securityProviderClass
.getDeclaredConstructor().newInstance();
}
} catch (NoSuchMethodException
| IllegalAccessException
| InstantiationException
| InvocationTargetException
| RuntimeException e) {
Throwable c = e.getCause() != null ? e.getCause() : e;
throw new IOException(
"From option " + OBSConstants.OBS_SECURITY_PROVIDER + ' ' + c,
c);
}
obsClient = new ObsClient(securityProvider, obsConf);
return obsClient;
}
public static Optional<Constructor> tryGetConstructor(final Class mainClss,
final Class[] args) {
try {
Constructor constructor = mainClss.getDeclaredConstructor(args);
return Optional.ofNullable(constructor);
} catch (NoSuchMethodException e) {
// ignore
return Optional.empty();
}
}
@Override
public ObsClient createObsClient(final URI name) throws IOException {
Configuration conf = getConf();
ExtObsConfiguration obsConf = new ExtObsConfiguration();
initConnectionSettings(conf, obsConf);
initProxySupport(conf, obsConf);
return createHuaweiObsClient(conf, obsConf, name);
}
}

View File

@ -16,23 +16,25 @@
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
package org.apache.hadoop.fs.obs;
import java.io.IOException;
/**
* Scale test for LocalMetadataStore.
* OBS file conflict exception.
*/
public class ITestLocalMetadataStoreScale
extends AbstractITestS3AMetadataStoreScale {
@Override
public MetadataStore createMetadataStore() throws IOException {
MetadataStore ms = new LocalMetadataStore();
ms.initialize(getFileSystem(), new S3Guard.TtlTimeProvider(getConf()));
return ms;
class FileConflictException extends IOException {
private static final long serialVersionUID = -897856973823710492L;
/**
* Constructs a <code>FileConflictException</code> with the specified detail
* message. The string <code>s</code> can be retrieved later by the
* <code>{@link Throwable#getMessage}</code>
* method of class <code>java.lang.Throwable</code>.
*
* @param s the detail message.
*/
FileConflictException(final String s) {
super(s);
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* OBS implementation of AbstractFileSystem, which delegates to the {@link
* OBSFileSystem}.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class OBS extends DelegateToFileSystem {
/**
* @param theUri URI of the file system
* @param conf Configuration for the file system
* @throws IOException on any failure to initialize this instance
* @throws URISyntaxException <code>theUri</code> has syntax error
*/
public OBS(final URI theUri, final Configuration conf)
throws IOException, URISyntaxException {
super(theUri, new OBSFileSystem(), conf, "obs", false);
}
@Override
public int getUriDefaultPort() {
return OBSConstants.OBS_DEFAULT_PORT;
}
}

View File

@ -0,0 +1,814 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
import com.obs.services.exception.ObsException;
import com.obs.services.model.CompleteMultipartUploadResult;
import com.obs.services.model.PartEtag;
import com.obs.services.model.PutObjectRequest;
import com.obs.services.model.UploadPartRequest;
import com.obs.services.model.UploadPartResult;
import com.obs.services.model.fs.WriteFileRequest;
import com.sun.istack.NotNull;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Syncable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* OBS output stream based on block buffering.
* <p>
* Upload files/parts directly via different buffering mechanisms: including
* memory and disk.
*
* <p>If the stream is closed and no update has started, then the upload is
* instead done as a single PUT operation.
*
* <p>Unstable: statistics and error handling might evolve.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
class OBSBlockOutputStream extends OutputStream implements Syncable {
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
OBSBlockOutputStream.class);
/**
* Owner FileSystem.
*/
private final OBSFileSystem fs;
/**
* Key of the object being uploaded.
*/
private final String key;
/**
* Length of object.
*/
private long objectLen;
/**
* Size of all blocks.
*/
private final int blockSize;
/**
* Callback for progress.
*/
private final ListeningExecutorService executorService;
/**
* Factory for creating blocks.
*/
private final OBSDataBlocks.BlockFactory blockFactory;
/**
* Preallocated byte buffer for writing single characters.
*/
private final byte[] singleCharWrite = new byte[1];
/**
* Closed flag.
*/
private final AtomicBoolean closed = new AtomicBoolean(false);
/**
* Has exception flag.
*/
private final AtomicBoolean hasException = new AtomicBoolean(false);
/**
* Has flushed flag.
*/
private final AtomicBoolean appendAble;
/**
* Multipart upload details; null means none started.
*/
private MultiPartUpload multiPartUpload;
/**
* Current data block. Null means none currently active.
*/
private OBSDataBlocks.DataBlock activeBlock;
/**
* Count of blocks uploaded.
*/
private long blockCount = 0;
/**
* Write operation helper; encapsulation of the filesystem operations.
*/
private OBSWriteOperationHelper writeOperationHelper;
/**
* Flag for mocking upload part error.
*/
private boolean mockUploadPartError = false;
/**
* An OBS output stream which uploads partitions in a separate pool of
* threads; different {@link OBSDataBlocks.BlockFactory} instances can control
* where data is buffered.
*
* @param owner OBSFilesystem
* @param obsObjectKey OBS object to work on
* @param objLen object length
* @param execService the executor service to use to schedule work
* @param isAppendable if append is supported
* @throws IOException on any problem
*/
OBSBlockOutputStream(
final OBSFileSystem owner,
final String obsObjectKey,
final long objLen,
final ExecutorService execService,
final boolean isAppendable)
throws IOException {
this.appendAble = new AtomicBoolean(isAppendable);
this.fs = owner;
this.key = obsObjectKey;
this.objectLen = objLen;
this.blockFactory = owner.getBlockFactory();
this.blockSize = (int) owner.getPartSize();
this.writeOperationHelper = owner.getWriteHelper();
Preconditions.checkArgument(
owner.getPartSize() >= OBSConstants.MULTIPART_MIN_SIZE,
"Block size is too small: %d", owner.getPartSize());
this.executorService = MoreExecutors.listeningDecorator(
execService);
this.multiPartUpload = null;
// create that first block. This guarantees that an open + close
// sequence writes a 0-byte entry.
createBlockIfNeeded();
LOG.debug(
"Initialized OBSBlockOutputStream for {}" + " output to {}",
owner.getWriteHelper(),
activeBlock);
}
/**
* Demand create a destination block.
*
* @return the active block; null if there isn't one.
* @throws IOException on any failure to create
*/
private synchronized OBSDataBlocks.DataBlock createBlockIfNeeded()
throws IOException {
if (activeBlock == null) {
blockCount++;
if (blockCount >= OBSConstants.MAX_MULTIPART_COUNT) {
LOG.warn(
"Number of partitions in stream exceeds limit for OBS: "
+ OBSConstants.MAX_MULTIPART_COUNT
+ " write may fail.");
}
activeBlock = blockFactory.create(blockCount, this.blockSize);
}
return activeBlock;
}
/**
* Synchronized accessor to the active block.
*
* @return the active block; null if there isn't one.
*/
synchronized OBSDataBlocks.DataBlock getActiveBlock() {
return activeBlock;
}
/**
* Set mock error.
*
* @param isException mock error
*/
@VisibleForTesting
public void mockPutPartError(final boolean isException) {
this.mockUploadPartError = isException;
}
/**
* Predicate to query whether or not there is an active block.
*
* @return true if there is an active block.
*/
private synchronized boolean hasActiveBlock() {
return activeBlock != null;
}
/**
* Clear the active block.
*/
private synchronized void clearActiveBlock() {
if (activeBlock != null) {
LOG.debug("Clearing active block");
}
activeBlock = null;
}
/**
* Check for the filesystem being open.
*
* @throws IOException if the filesystem is closed.
*/
private void checkOpen() throws IOException {
if (closed.get()) {
throw new IOException(
"Filesystem " + writeOperationHelper.toString(key) + " closed");
}
}
/**
* The flush operation does not trigger an upload; that awaits the next block
* being full. What it does do is call {@code flush() } on the current block,
* leaving it to choose how to react.
*
* @throws IOException Any IO problem.
*/
@Override
public synchronized void flush() throws IOException {
checkOpen();
OBSDataBlocks.DataBlock dataBlock = getActiveBlock();
if (dataBlock != null) {
dataBlock.flush();
}
}
/**
* Writes a byte to the destination. If this causes the buffer to reach its
* limit, the actual upload is submitted to the threadpool.
*
* @param b the int of which the lowest byte is written
* @throws IOException on any problem
*/
@Override
public synchronized void write(final int b) throws IOException {
singleCharWrite[0] = (byte) b;
write(singleCharWrite, 0, 1);
}
/**
* Writes a range of bytes from to the memory buffer. If this causes the
* buffer to reach its limit, the actual upload is submitted to the threadpool
* and the remainder of the array is written to memory (recursively).
*
* @param source byte array containing
* @param offset offset in array where to start
* @param len number of bytes to be written
* @throws IOException on any problem
*/
@Override
public synchronized void write(@NotNull final byte[] source,
final int offset, final int len)
throws IOException {
if (hasException.get()) {
String closeWarning = String.format(
"write has error. bs : pre upload obs[%s] has error.", key);
LOG.warn(closeWarning);
throw new IOException(closeWarning);
}
OBSDataBlocks.validateWriteArgs(source, offset, len);
checkOpen();
if (len == 0) {
return;
}
OBSDataBlocks.DataBlock block = createBlockIfNeeded();
int written = block.write(source, offset, len);
int remainingCapacity = block.remainingCapacity();
try {
innerWrite(source, offset, len, written, remainingCapacity);
} catch (IOException e) {
LOG.error(
"Write data for key {} of bucket {} error, error message {}",
key, fs.getBucket(),
e.getMessage());
throw e;
}
}
private synchronized void innerWrite(final byte[] source, final int offset,
final int len,
final int written, final int remainingCapacity)
throws IOException {
if (written < len) {
// not everything was written the block has run out
// of capacity
// Trigger an upload then process the remainder.
LOG.debug(
"writing more data than block has capacity -triggering upload");
if (appendAble.get()) {
// to write a buffer then append to obs
LOG.debug("[Append] open stream and single write size {} "
+ "greater than buffer size {}, append buffer to obs.",
len, blockSize);
flushCurrentBlock();
} else {
// block output stream logic, multi-part upload
uploadCurrentBlock();
}
// tail recursion is mildly expensive, but given buffer sizes
// must be MB. it's unlikely to recurse very deeply.
this.write(source, offset + written, len - written);
} else {
if (remainingCapacity == 0) {
// the whole buffer is done, trigger an upload
if (appendAble.get()) {
// to write a buffer then append to obs
LOG.debug("[Append] open stream and already write size "
+ "equal to buffer size {}, append buffer to obs.",
blockSize);
flushCurrentBlock();
} else {
// block output stream logic, multi-part upload
uploadCurrentBlock();
}
}
}
}
/**
* Start an asynchronous upload of the current block.
*
* @throws IOException Problems opening the destination for upload or
* initializing the upload.
*/
private synchronized void uploadCurrentBlock() throws IOException {
Preconditions.checkState(hasActiveBlock(), "No active block");
LOG.debug("Writing block # {}", blockCount);
try {
if (multiPartUpload == null) {
LOG.debug("Initiating Multipart upload");
multiPartUpload = new MultiPartUpload();
}
multiPartUpload.uploadBlockAsync(getActiveBlock());
} catch (IOException e) {
hasException.set(true);
LOG.error("Upload current block on ({}/{}) failed.", fs.getBucket(),
key, e);
throw e;
} finally {
// set the block to null, so the next write will create a new block.
clearActiveBlock();
}
}
/**
* Close the stream.
*
* <p>This will not return until the upload is complete or the attempt to
* perform the upload has failed. Exceptions raised in this method are
* indicative that the write has failed and data is at risk of being lost.
*
* @throws IOException on any failure.
*/
@Override
public synchronized void close() throws IOException {
if (closed.getAndSet(true)) {
// already closed
LOG.debug("Ignoring close() as stream is already closed");
return;
}
if (hasException.get()) {
String closeWarning = String.format(
"closed has error. bs : pre write obs[%s] has error.", key);
LOG.warn(closeWarning);
throw new IOException(closeWarning);
}
// do upload
completeCurrentBlock();
// clear
clearHFlushOrSync();
// All end of write operations, including deleting fake parent
// directories
writeOperationHelper.writeSuccessful(key);
}
/**
* If flush has take place, need to append file, else to put object.
*
* @throws IOException any problem in append or put object
*/
private synchronized void putObjectIfNeedAppend() throws IOException {
if (appendAble.get() && fs.exists(
OBSCommonUtils.keyToQualifiedPath(fs, key))) {
appendFsFile();
} else {
putObject();
}
}
/**
* Append posix file.
*
* @throws IOException any problem
*/
private synchronized void appendFsFile() throws IOException {
LOG.debug("bucket is posix, to append file. key is {}", key);
final OBSDataBlocks.DataBlock block = getActiveBlock();
WriteFileRequest writeFileReq;
if (block instanceof OBSDataBlocks.DiskBlock) {
writeFileReq = OBSCommonUtils.newAppendFileRequest(fs, key,
objectLen, (File) block.startUpload());
} else {
writeFileReq = OBSCommonUtils.newAppendFileRequest(fs, key,
objectLen, (InputStream) block.startUpload());
}
OBSCommonUtils.appendFile(fs, writeFileReq);
objectLen += block.dataSize();
}
/**
* Upload the current block as a single PUT request; if the buffer is empty a
* 0-byte PUT will be invoked, as it is needed to create an entry at the far
* end.
*
* @throws IOException any problem.
*/
private synchronized void putObject() throws IOException {
LOG.debug("Executing regular upload for {}",
writeOperationHelper.toString(key));
final OBSDataBlocks.DataBlock block = getActiveBlock();
clearActiveBlock();
final int size = block.dataSize();
final PutObjectRequest putObjectRequest;
if (block instanceof OBSDataBlocks.DiskBlock) {
putObjectRequest = writeOperationHelper.newPutRequest(key,
(File) block.startUpload());
} else {
putObjectRequest =
writeOperationHelper.newPutRequest(key,
(InputStream) block.startUpload(), size);
}
putObjectRequest.setAcl(fs.getCannedACL());
fs.getSchemeStatistics().incrementWriteOps(1);
try {
// the putObject call automatically closes the input
// stream afterwards.
writeOperationHelper.putObject(putObjectRequest);
} finally {
OBSCommonUtils.closeAll(block);
}
}
@Override
public synchronized String toString() {
final StringBuilder sb = new StringBuilder("OBSBlockOutputStream{");
sb.append(writeOperationHelper.toString());
sb.append(", blockSize=").append(blockSize);
OBSDataBlocks.DataBlock block = activeBlock;
if (block != null) {
sb.append(", activeBlock=").append(block);
}
sb.append('}');
return sb.toString();
}
public synchronized void sync() {
// need to do
}
@Override
public synchronized void hflush() throws IOException {
// hflush hsyn same
flushOrSync();
}
/**
* Flush local file or multipart to obs. focus: not posix bucket is not
* support
*
* @throws IOException io exception
*/
private synchronized void flushOrSync() throws IOException {
checkOpen();
if (hasException.get()) {
String flushWarning = String.format(
"flushOrSync has error. bs : pre write obs[%s] has error.",
key);
LOG.warn(flushWarning);
throw new IOException(flushWarning);
}
if (fs.isFsBucket()) {
// upload
flushCurrentBlock();
// clear
clearHFlushOrSync();
} else {
LOG.warn("not posix bucket, not support hflush or hsync.");
flush();
}
}
/**
* Clear for hflush or hsync.
*/
private synchronized void clearHFlushOrSync() {
appendAble.set(true);
multiPartUpload = null;
}
/**
* Upload block to obs.
*
* @param block block
* @param hasBlock jungle if has block
* @throws IOException io exception
*/
private synchronized void uploadWriteBlocks(
final OBSDataBlocks.DataBlock block,
final boolean hasBlock)
throws IOException {
if (multiPartUpload == null) {
if (hasBlock) {
// no uploads of data have taken place, put the single block
// up. This must happen even if there is no data, so that 0 byte
// files are created.
putObjectIfNeedAppend();
}
} else {
// there has already been at least one block scheduled for upload;
// put up the current then wait
if (hasBlock && block.hasData()) {
// send last part
uploadCurrentBlock();
}
// wait for the partial uploads to finish
final List<PartEtag> partETags
= multiPartUpload.waitForAllPartUploads();
// then complete the operation
multiPartUpload.complete(partETags);
}
LOG.debug("Upload complete for {}", writeOperationHelper.toString(key));
}
private synchronized void completeCurrentBlock() throws IOException {
OBSDataBlocks.DataBlock block = getActiveBlock();
boolean hasBlock = hasActiveBlock();
LOG.debug("{}: complete block #{}: current block= {}", this, blockCount,
hasBlock ? block : "(none)");
try {
uploadWriteBlocks(block, hasBlock);
} catch (IOException ioe) {
LOG.error("Upload data to obs error. io exception : {}",
ioe.getMessage());
throw ioe;
} catch (Exception e) {
LOG.error("Upload data to obs error. other exception : {}",
e.getMessage());
throw e;
} finally {
OBSCommonUtils.closeAll(block);
clearActiveBlock();
}
}
private synchronized void flushCurrentBlock() throws IOException {
OBSDataBlocks.DataBlock block = getActiveBlock();
boolean hasBlock = hasActiveBlock();
LOG.debug(
"{}: complete block #{}: current block= {}", this, blockCount,
hasBlock ? block : "(none)");
try {
uploadWriteBlocks(block, hasBlock);
} catch (IOException ioe) {
LOG.error("hflush data to obs error. io exception : {}",
ioe.getMessage());
hasException.set(true);
throw ioe;
} catch (Exception e) {
LOG.error("hflush data to obs error. other exception : {}",
e.getMessage());
hasException.set(true);
throw e;
} finally {
OBSCommonUtils.closeAll(block);
clearActiveBlock();
}
}
@Override
public synchronized void hsync() throws IOException {
flushOrSync();
}
/**
* Multiple partition upload.
*/
private class MultiPartUpload {
/**
* Upload id for multipart upload.
*/
private final String uploadId;
/**
* List for async part upload future.
*/
private final List<ListenableFuture<PartEtag>> partETagsFutures;
MultiPartUpload() throws IOException {
this.uploadId = writeOperationHelper.initiateMultiPartUpload(key);
this.partETagsFutures = new ArrayList<>(2);
LOG.debug(
"Initiated multi-part upload for {} with , the key is {}"
+ "id '{}'",
writeOperationHelper,
uploadId,
key);
}
/**
* Upload a block of data asynchronously.
*
* @param block block to upload
* @throws IOException upload failure
*/
private void uploadBlockAsync(final OBSDataBlocks.DataBlock block)
throws IOException {
LOG.debug("Queueing upload of {}", block);
final int size = block.dataSize();
final int currentPartNumber = partETagsFutures.size() + 1;
final UploadPartRequest request;
if (block instanceof OBSDataBlocks.DiskBlock) {
request = writeOperationHelper.newUploadPartRequest(
key,
uploadId,
currentPartNumber,
size,
(File) block.startUpload());
} else {
request = writeOperationHelper.newUploadPartRequest(
key,
uploadId,
currentPartNumber,
size,
(InputStream) block.startUpload());
}
ListenableFuture<PartEtag> partETagFuture = executorService.submit(
() -> {
// this is the queued upload operation
LOG.debug("Uploading part {} for id '{}'",
currentPartNumber, uploadId);
// do the upload
PartEtag partETag = null;
try {
if (mockUploadPartError) {
throw new ObsException("mock upload part error");
}
UploadPartResult uploadPartResult
= OBSCommonUtils.uploadPart(fs, request);
partETag =
new PartEtag(uploadPartResult.getEtag(),
uploadPartResult.getPartNumber());
if (LOG.isDebugEnabled()) {
LOG.debug("Completed upload of {} to part {}",
block, partETag);
}
} catch (ObsException e) {
// catch all exception
hasException.set(true);
LOG.error("UploadPart failed (ObsException). {}",
OBSCommonUtils.translateException("UploadPart", key,
e).getMessage());
} finally {
// close the stream and block
OBSCommonUtils.closeAll(block);
}
return partETag;
});
partETagsFutures.add(partETagFuture);
}
/**
* Block awaiting all outstanding uploads to complete.
*
* @return list of results
* @throws IOException IO Problems
*/
private List<PartEtag> waitForAllPartUploads() throws IOException {
LOG.debug("Waiting for {} uploads to complete",
partETagsFutures.size());
try {
return Futures.allAsList(partETagsFutures).get();
} catch (InterruptedException ie) {
LOG.warn("Interrupted partUpload", ie);
LOG.debug("Cancelling futures");
for (ListenableFuture<PartEtag> future : partETagsFutures) {
future.cancel(true);
}
// abort multipartupload
this.abort();
throw new IOException(
"Interrupted multi-part upload with id '" + uploadId
+ "' to " + key);
} catch (ExecutionException ee) {
// there is no way of recovering so abort
// cancel all partUploads
LOG.debug("While waiting for upload completion", ee);
LOG.debug("Cancelling futures");
for (ListenableFuture<PartEtag> future : partETagsFutures) {
future.cancel(true);
}
// abort multipartupload
this.abort();
throw OBSCommonUtils.extractException(
"Multi-part upload with id '" + uploadId + "' to " + key,
key, ee);
}
}
/**
* This completes a multipart upload. Sometimes it fails; here retries are
* handled to avoid losing all data on a transient failure.
*
* @param partETags list of partial uploads
* @return result for completing multipart upload
* @throws IOException on any problem
*/
private CompleteMultipartUploadResult complete(
final List<PartEtag> partETags) throws IOException {
String operation = String.format(
"Completing multi-part upload for key '%s',"
+ " id '%s' with %s partitions ",
key, uploadId, partETags.size());
try {
LOG.debug(operation);
return writeOperationHelper.completeMultipartUpload(key,
uploadId, partETags);
} catch (ObsException e) {
throw OBSCommonUtils.translateException(operation, key, e);
}
}
/**
* Abort a multi-part upload. Retries are attempted on failures.
* IOExceptions are caught; this is expected to be run as a cleanup
* process.
*/
void abort() {
String operation =
String.format(
"Aborting multi-part upload for '%s', id '%s",
writeOperationHelper, uploadId);
try {
LOG.debug(operation);
writeOperationHelper.abortMultipartUpload(key, uploadId);
} catch (ObsException e) {
LOG.warn(
"Unable to abort multipart upload, you may need to purge "
+ "uploaded parts",
e);
}
}
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.ObsClient;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import java.io.IOException;
import java.net.URI;
/**
* Factory for creating OBS client instance to be used by {@link
* OBSFileSystem}.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
interface OBSClientFactory {
/**
* Creates a new {@link ObsClient} client. This method accepts the OBS file
* system URI both in raw input form and validated form as separate arguments,
* because both values may be useful in logging.
*
* @param name raw input OBS file system URI
* @return OBS client
* @throws IOException IO problem
*/
ObsClient createObsClient(URI name) throws IOException;
}

View File

@ -0,0 +1,726 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* All constants used by {@link OBSFileSystem}.
*
* <p>Some of the strings are marked as {@code Unstable}. This means that they
* may be unsupported in future; at which point they will be marked as
* deprecated and simply ignored.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
final class OBSConstants {
/**
* Minimum multipart size which OBS supports.
*/
static final int MULTIPART_MIN_SIZE = 5 * 1024 * 1024;
/**
* OBS access key.
*/
static final String ACCESS_KEY = "fs.obs.access.key";
/**
* OBS secret key.
*/
static final String SECRET_KEY = "fs.obs.secret.key";
/**
* OBS credentials provider.
*/
static final String OBS_CREDENTIALS_PROVIDER
= "fs.obs.credentials.provider";
/**
* OBS client security provider.
*/
static final String OBS_SECURITY_PROVIDER = "fs.obs.security.provider";
/**
* Extra set of security credentials which will be prepended to that set in
* {@code "hadoop.security.credential.provider.path"}. This extra option
* allows for per-bucket overrides.
*/
static final String OBS_SECURITY_CREDENTIAL_PROVIDER_PATH =
"fs.obs.security.credential.provider.path";
/**
* Session token for when using TemporaryOBSCredentialsProvider.
*/
static final String SESSION_TOKEN = "fs.obs.session.token";
/**
* Maximum number of simultaneous connections to obs.
*/
static final String MAXIMUM_CONNECTIONS = "fs.obs.connection.maximum";
/**
* Default value of {@link #MAXIMUM_CONNECTIONS}.
*/
static final int DEFAULT_MAXIMUM_CONNECTIONS = 1000;
/**
* Connect to obs over ssl.
*/
static final String SECURE_CONNECTIONS = "fs.obs.connection.ssl.enabled";
/**
* Default value of {@link #SECURE_CONNECTIONS}.
*/
static final boolean DEFAULT_SECURE_CONNECTIONS = false;
/**
* Use a custom endpoint.
*/
static final String ENDPOINT = "fs.obs.endpoint";
/**
* Host for connecting to OBS through proxy server.
*/
static final String PROXY_HOST = "fs.obs.proxy.host";
/**
* Port for connecting to OBS through proxy server.
*/
static final String PROXY_PORT = "fs.obs.proxy.port";
/**
* User name for connecting to OBS through proxy server.
*/
static final String PROXY_USERNAME = "fs.obs.proxy.username";
/**
* Password for connecting to OBS through proxy server.
*/
static final String PROXY_PASSWORD = "fs.obs.proxy.password";
/**
* Default port for HTTPS.
*/
static final int DEFAULT_HTTPS_PORT = 443;
/**
* Default port for HTTP.
*/
static final int DEFAULT_HTTP_PORT = 80;
/**
* Number of times we should retry errors.
*/
static final String MAX_ERROR_RETRIES = "fs.obs.attempts.maximum";
/**
* Default value of {@link #MAX_ERROR_RETRIES}.
*/
static final int DEFAULT_MAX_ERROR_RETRIES = 3;
/**
* Seconds until we give up trying to establish a connection to obs.
*/
static final String ESTABLISH_TIMEOUT
= "fs.obs.connection.establish.timeout";
/**
* Default value of {@link #ESTABLISH_TIMEOUT}.
*/
static final int DEFAULT_ESTABLISH_TIMEOUT = 120000;
/**
* Seconds until we give up on a connection to obs.
*/
static final String SOCKET_TIMEOUT = "fs.obs.connection.timeout";
/**
* Default value of {@link #SOCKET_TIMEOUT}.
*/
static final int DEFAULT_SOCKET_TIMEOUT = 120000;
/**
* Socket send buffer to be used in OBS SDK.
*/
static final String SOCKET_SEND_BUFFER = "fs.obs.socket.send.buffer";
/**
* Default value of {@link #SOCKET_SEND_BUFFER}.
*/
static final int DEFAULT_SOCKET_SEND_BUFFER = 256 * 1024;
/**
* Socket receive buffer to be used in OBS SDK.
*/
static final String SOCKET_RECV_BUFFER = "fs.obs.socket.recv.buffer";
/**
* Default value of {@link #SOCKET_RECV_BUFFER}.
*/
static final int DEFAULT_SOCKET_RECV_BUFFER = 256 * 1024;
/**
* Number of records to get while paging through a directory listing.
*/
static final String MAX_PAGING_KEYS = "fs.obs.paging.maximum";
/**
* Default value of {@link #MAX_PAGING_KEYS}.
*/
static final int DEFAULT_MAX_PAGING_KEYS = 1000;
/**
* Maximum number of threads to allow in the pool used by TransferManager.
*/
static final String MAX_THREADS = "fs.obs.threads.max";
/**
* Default value of {@link #MAX_THREADS}.
*/
static final int DEFAULT_MAX_THREADS = 20;
/**
* Maximum number of tasks cached if all threads are already uploading.
*/
static final String MAX_TOTAL_TASKS = "fs.obs.max.total.tasks";
/**
* Default value of {@link #MAX_TOTAL_TASKS}.
*/
static final int DEFAULT_MAX_TOTAL_TASKS = 20;
/**
* Max number of copy threads.
*/
static final String MAX_COPY_THREADS = "fs.obs.copy.threads.max";
/**
* Default value of {@link #MAX_COPY_THREADS}.
*/
static final int DEFAULT_MAX_COPY_THREADS = 40;
/**
* Max number of delete threads.
*/
static final String MAX_DELETE_THREADS = "fs.obs.delete.threads.max";
/**
* Default value of {@link #MAX_DELETE_THREADS}.
*/
static final int DEFAULT_MAX_DELETE_THREADS = 20;
/**
* Unused option: maintained for compile-time compatibility. If set, a warning
* is logged in OBS during init.
*/
@Deprecated
static final String CORE_THREADS = "fs.obs.threads.core";
/**
* The time that an idle thread waits before terminating.
*/
static final String KEEPALIVE_TIME = "fs.obs.threads.keepalivetime";
/**
* Default value of {@link #KEEPALIVE_TIME}.
*/
static final int DEFAULT_KEEPALIVE_TIME = 60;
/**
* Size of each of or multipart pieces in bytes.
*/
static final String MULTIPART_SIZE = "fs.obs.multipart.size";
/**
* Default value of {@link #MULTIPART_SIZE}.
*/
static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB
/**
* Enable multi-object delete calls.
*/
static final String ENABLE_MULTI_DELETE = "fs.obs.multiobjectdelete.enable";
/**
* Max number of objects in one multi-object delete call. This option takes
* effect only when the option 'ENABLE_MULTI_DELETE' is set to 'true'.
*/
static final String MULTI_DELETE_MAX_NUMBER
= "fs.obs.multiobjectdelete.maximum";
/**
* Default value of {@link #MULTI_DELETE_MAX_NUMBER}.
*/
static final int DEFAULT_MULTI_DELETE_MAX_NUMBER = 1000;
/**
* Delete recursively or not.
*/
static final String MULTI_DELETE_RECURSION
= "fs.obs.multiobjectdelete.recursion";
/**
* Minimum number of objects in one multi-object delete call.
*/
static final String MULTI_DELETE_THRESHOLD
= "fs.obs.multiobjectdelete.threshold";
/**
* Default value of {@link #MULTI_DELETE_THRESHOLD}.
*/
static final int MULTI_DELETE_DEFAULT_THRESHOLD = 3;
/**
* Comma separated list of directories.
*/
static final String BUFFER_DIR = "fs.obs.buffer.dir";
/**
* Switch to the fast block-by-block upload mechanism.
*/
static final String FAST_UPLOAD = "fs.obs.fast.upload";
/**
* What buffer to use. Default is {@link #FAST_UPLOAD_BUFFER_DISK} Value:
* {@value}
*/
@InterfaceStability.Unstable
static final String FAST_UPLOAD_BUFFER = "fs.obs.fast.upload.buffer";
/**
* Buffer blocks to disk: {@value}. Capacity is limited to available disk
* space.
*/
@InterfaceStability.Unstable
static final String FAST_UPLOAD_BUFFER_DISK = "disk";
/**
* Use an in-memory array. Fast but will run of heap rapidly: {@value}.
*/
@InterfaceStability.Unstable
static final String FAST_UPLOAD_BUFFER_ARRAY = "array";
/**
* Use a byte buffer. May be more memory efficient than the {@link
* #FAST_UPLOAD_BUFFER_ARRAY}: {@value}.
*/
@InterfaceStability.Unstable
static final String FAST_UPLOAD_BYTEBUFFER = "bytebuffer";
/**
* Maximum number of blocks a single output stream can have active (uploading,
* or queued to the central FileSystem instance's pool of queued operations.
* )This stops a single stream overloading the shared thread pool. {@value}
*
* <p>Default is {@link #DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS}
*/
@InterfaceStability.Unstable
static final String FAST_UPLOAD_ACTIVE_BLOCKS
= "fs.obs.fast.upload.active.blocks";
/**
* Limit of queued block upload operations before writes block. Value:
* {@value}
*/
@InterfaceStability.Unstable
static final int DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS = 4;
/**
* Canned acl options: Private | PublicRead | PublicReadWrite |
* AuthenticatedRead | LogDeliveryWrite | BucketOwnerRead |
* BucketOwnerFullControl.
*/
static final String CANNED_ACL = "fs.obs.acl.default";
/**
* Default value of {@link #CANNED_ACL}.
*/
static final String DEFAULT_CANNED_ACL = "";
/**
* Should we try to purge old multipart uploads when starting up.
*/
static final String PURGE_EXISTING_MULTIPART = "fs.obs.multipart.purge";
/**
* Default value of {@link #PURGE_EXISTING_MULTIPART}.
*/
static final boolean DEFAULT_PURGE_EXISTING_MULTIPART = false;
/**
* Purge any multipart uploads older than this number of seconds.
*/
static final String PURGE_EXISTING_MULTIPART_AGE
= "fs.obs.multipart.purge.age";
/**
* Default value of {@link #PURGE_EXISTING_MULTIPART_AGE}.
*/
static final long DEFAULT_PURGE_EXISTING_MULTIPART_AGE = 86400;
/**
* OBS folder suffix.
*/
static final String OBS_FOLDER_SUFFIX = "_$folder$";
/**
* Block size for
* {@link org.apache.hadoop.fs.FileSystem#getDefaultBlockSize()}.
*/
static final String FS_OBS_BLOCK_SIZE = "fs.obs.block.size";
/**
* Default value of {@link #FS_OBS_BLOCK_SIZE}.
*/
static final int DEFAULT_FS_OBS_BLOCK_SIZE = 128 * 1024 * 1024;
/**
* OBS scheme.
*/
static final String OBS_SCHEME = "obs";
/**
* Prefix for all OBS properties: {@value}.
*/
static final String FS_OBS_PREFIX = "fs.obs.";
/**
* Prefix for OBS bucket-specific properties: {@value}.
*/
static final String FS_OBS_BUCKET_PREFIX = "fs.obs.bucket.";
/**
* OBS default port.
*/
static final int OBS_DEFAULT_PORT = -1;
/**
* User agent prefix.
*/
static final String USER_AGENT_PREFIX = "fs.obs.user.agent.prefix";
/**
* Read ahead buffer size to prevent connection re-establishments.
*/
static final String READAHEAD_RANGE = "fs.obs.readahead.range";
/**
* Default value of {@link #READAHEAD_RANGE}.
*/
static final long DEFAULT_READAHEAD_RANGE = 1024 * 1024;
/**
* Flag indicating if {@link OBSInputStream#read(long, byte[], int, int)} will
* use the implementation of
* {@link org.apache.hadoop.fs.FSInputStream#read(long,
* byte[], int, int)}.
*/
static final String READ_TRANSFORM_ENABLE = "fs.obs.read.transform.enable";
/**
* OBS client factory implementation class.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
static final String OBS_CLIENT_FACTORY_IMPL
= "fs.obs.client.factory.impl";
/**
* Default value of {@link #OBS_CLIENT_FACTORY_IMPL}.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
static final Class<? extends OBSClientFactory>
DEFAULT_OBS_CLIENT_FACTORY_IMPL =
DefaultOBSClientFactory.class;
/**
* Maximum number of partitions in a multipart upload: {@value}.
*/
@InterfaceAudience.Private
static final int MAX_MULTIPART_COUNT = 10000;
// OBS Client configuration
/**
* Idle connection time.
*/
static final String IDLE_CONNECTION_TIME = "fs.obs.idle.connection.time";
/**
* Default value of {@link #IDLE_CONNECTION_TIME}.
*/
static final int DEFAULT_IDLE_CONNECTION_TIME = 30000;
/**
* Maximum number of idle connections.
*/
static final String MAX_IDLE_CONNECTIONS = "fs.obs.max.idle.connections";
/**
* Default value of {@link #MAX_IDLE_CONNECTIONS}.
*/
static final int DEFAULT_MAX_IDLE_CONNECTIONS = 1000;
/**
* Keep alive.
*/
static final String KEEP_ALIVE = "fs.obs.keep.alive";
/**
* Default value of {@link #KEEP_ALIVE}.
*/
static final boolean DEFAULT_KEEP_ALIVE = true;
/**
* Validate certificate.
*/
static final String VALIDATE_CERTIFICATE = "fs.obs.validate.certificate";
/**
* Default value of {@link #VALIDATE_CERTIFICATE}.
*/
static final boolean DEFAULT_VALIDATE_CERTIFICATE = false;
/**
* Verify response content type.
*/
static final String VERIFY_RESPONSE_CONTENT_TYPE
= "fs.obs.verify.response.content.type";
/**
* Default value of {@link #VERIFY_RESPONSE_CONTENT_TYPE}.
*/
static final boolean DEFAULT_VERIFY_RESPONSE_CONTENT_TYPE = true;
/**
* UploadStreamRetryBufferSize.
*/
static final String UPLOAD_STREAM_RETRY_SIZE
= "fs.obs.upload.stream.retry.buffer.size";
/**
* Default value of {@link #UPLOAD_STREAM_RETRY_SIZE}.
*/
static final int DEFAULT_UPLOAD_STREAM_RETRY_SIZE = 512 * 1024;
/**
* Read buffer size.
*/
static final String READ_BUFFER_SIZE = "fs.obs.read.buffer.size";
/**
* Default value of {@link #READ_BUFFER_SIZE}.
*/
static final int DEFAULT_READ_BUFFER_SIZE = 256 * 1024;
/**
* Write buffer size.
*/
static final String WRITE_BUFFER_SIZE = "fs.obs.write.buffer.size";
/**
* Default value of {@link #WRITE_BUFFER_SIZE}.
*/
static final int DEFAULT_WRITE_BUFFER_SIZE = 256 * 1024;
/**
* Canonical name.
*/
static final String CNAME = "fs.obs.cname";
/**
* Default value of {@link #CNAME}.
*/
static final boolean DEFAULT_CNAME = false;
/**
* Strict host name verification.
*/
static final String STRICT_HOSTNAME_VERIFICATION
= "fs.obs.strict.hostname.verification";
/**
* Default value of {@link #STRICT_HOSTNAME_VERIFICATION}.
*/
static final boolean DEFAULT_STRICT_HOSTNAME_VERIFICATION = false;
/**
* Size of object copy part pieces in bytes.
*/
static final String COPY_PART_SIZE = "fs.obs.copypart.size";
/**
* Maximum value of {@link #COPY_PART_SIZE}.
*/
static final long MAX_COPY_PART_SIZE = 5368709120L; // 5GB
/**
* Default value of {@link #COPY_PART_SIZE}.
*/
static final long DEFAULT_COPY_PART_SIZE = 104857600L; // 100MB
/**
* Maximum number of copy part threads.
*/
static final String MAX_COPY_PART_THREADS = "fs.obs.copypart.threads.max";
/**
* Default value of {@link #MAX_COPY_PART_THREADS}.
*/
static final int DEFAULT_MAX_COPY_PART_THREADS = 40;
/**
* Number of core list threads.
*/
static final String CORE_LIST_THREADS = "fs.obs.list.threads.core";
/**
* Default value of {@link #CORE_LIST_THREADS}.
*/
static final int DEFAULT_CORE_LIST_THREADS = 30;
/**
* Maximum number of list threads.
*/
static final String MAX_LIST_THREADS = "fs.obs.list.threads.max";
/**
* Default value of {@link #MAX_LIST_THREADS}.
*/
static final int DEFAULT_MAX_LIST_THREADS = 60;
/**
* Capacity of list work queue.
*/
static final String LIST_WORK_QUEUE_CAPACITY
= "fs.obs.list.workqueue.capacity";
/**
* Default value of {@link #LIST_WORK_QUEUE_CAPACITY}.
*/
static final int DEFAULT_LIST_WORK_QUEUE_CAPACITY = 1024;
/**
* List parallel factor.
*/
static final String LIST_PARALLEL_FACTOR = "fs.obs.list.parallel.factor";
/**
* Default value of {@link #LIST_PARALLEL_FACTOR}.
*/
static final int DEFAULT_LIST_PARALLEL_FACTOR = 30;
/**
* Switch for the fast delete.
*/
static final String TRASH_ENABLE = "fs.obs.trash.enable";
/**
* Enable obs content summary or not.
*/
static final String OBS_CONTENT_SUMMARY_ENABLE
= "fs.obs.content.summary.enable";
/**
* Enable obs client dfs list or not.
*/
static final String OBS_CLIENT_DFS_LIST_ENABLE
= "fs.obs.client.dfs.list.enable";
/**
* Default trash : false.
*/
static final boolean DEFAULT_TRASH = false;
/**
* The fast delete recycle directory.
*/
static final String TRASH_DIR = "fs.obs.trash.dir";
/**
* Encryption type is sse-kms or sse-c.
*/
static final String SSE_TYPE = "fs.obs.server-side-encryption-type";
/**
* Kms key id for sse-kms, while key base64 encoded content for sse-c.
*/
static final String SSE_KEY = "fs.obs.server-side-encryption-key";
/**
* Array first block size.
*/
static final String FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE
= "fs.obs.fast.upload.array.first.buffer";
/**
* The fast upload buffer array first block default size.
*/
static final int FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE_DEFAULT = 1024
* 1024;
/**
* Auth Type Negotiation Enable Switch.
*/
static final String SDK_AUTH_TYPE_NEGOTIATION_ENABLE
= "fs.obs.authtype.negotiation.enable";
/**
* Default value of {@link #SDK_AUTH_TYPE_NEGOTIATION_ENABLE}.
*/
static final boolean DEFAULT_SDK_AUTH_TYPE_NEGOTIATION_ENABLE = false;
/**
* Okhttp retryOnConnectionFailure switch.
*/
static final String SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE
= "fs.obs.connection.retry.enable";
/**
* Default value of {@link #SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE}.
*/
static final boolean DEFAULT_SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE = true;
/**
* Sdk max retry times on unexpected end of stream. exception, default: -1,
* don't retry
*/
static final String SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION
= "fs.obs.unexpectedend.retrytime";
/**
* Default value of {@link #SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION}.
*/
static final int DEFAULT_SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION = -1;
/**
* Maximum sdk connection retry times, default : 2000.
*/
static final int DEFAULT_MAX_SDK_CONNECTION_RETRY_TIMES = 2000;
/**
* Second to millisecond factor.
*/
static final int SEC2MILLISEC_FACTOR = 1000;
private OBSConstants() {
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
/**
* File status for an OBS file.
*
* <p>The subclass is private as it should not be created directly.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
class OBSFileStatus extends FileStatus {
/**
* Create a directory status.
*
* @param path the path
* @param owner the owner
*/
OBSFileStatus(final Path path, final String owner) {
super(0, true, 1, 0, 0, path);
setOwner(owner);
setGroup(owner);
}
/**
* Create a directory status.
*
* @param modificationTime modification time
* @param path the path
* @param owner the owner
*/
OBSFileStatus(final Path path, final long modificationTime,
final String owner) {
super(0, true, 1, 0, modificationTime, path);
setOwner(owner);
setGroup(owner);
}
/**
* Create a directory status.
*
* @param modificationTime modification time
* @param accessTime access time
* @param path the path
* @param owner the owner
*/
OBSFileStatus(final Path path, final long modificationTime,
final long accessTime,
final String owner) {
super(0, true, 1, 0, modificationTime, accessTime, null, owner, owner,
path);
}
/**
* A simple file.
*
* @param length file length
* @param modificationTime mod time
* @param path path
* @param blockSize block size
* @param owner owner
*/
OBSFileStatus(
final long length, final long modificationTime, final Path path,
final long blockSize,
final String owner) {
super(length, false, 1, blockSize, modificationTime, path);
setOwner(owner);
setGroup(owner);
}
}

View File

@ -0,0 +1,744 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.model.ListObjectsRequest;
import com.obs.services.model.ObjectListing;
import com.obs.services.model.ObjectMetadata;
import com.obs.services.model.ObsObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Queue;
import java.util.Stack;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* OBS depth first search listing implementation for posix bucket.
*/
class OBSFsDFSListing extends ObjectListing {
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
OBSFsDFSListing.class);
static void increaseLevelStats(final List<LevelStats> levelStatsList,
final int level,
final boolean isDir) {
int currMaxLevel = levelStatsList.size() - 1;
if (currMaxLevel < level) {
for (int i = 0; i < level - currMaxLevel; i++) {
levelStatsList.add(new LevelStats(currMaxLevel + 1 + i));
}
}
if (isDir) {
levelStatsList.get(level).increaseDirNum();
} else {
levelStatsList.get(level).increaseFileNum();
}
}
static String fsDFSListNextBatch(final OBSFileSystem owner,
final Stack<ListEntity> listStack,
final Queue<ListEntity> resultQueue,
final String marker,
final int maxKeyNum,
final List<ObsObject> objectSummaries,
final List<LevelStats> levelStatsList) throws IOException {
// 0. check if marker matches with the peek of result queue when marker
// is given
if (marker != null) {
if (resultQueue.isEmpty()) {
throw new IllegalArgumentException(
"result queue is empty, but marker is not empty: "
+ marker);
} else if (resultQueue.peek().getType()
== ListEntityType.LIST_TAIL) {
throw new RuntimeException(
"cannot put list tail (" + resultQueue.peek()
+ ") into result queue");
} else if (!marker.equals(
resultQueue.peek().getType() == ListEntityType.COMMON_PREFIX
? resultQueue.peek().getCommonPrefix()
: resultQueue.peek().getObjectSummary().getObjectKey())) {
throw new IllegalArgumentException("marker (" + marker
+ ") does not match with result queue peek ("
+ resultQueue.peek() + ")");
}
}
// 1. fetch some list results from local result queue
int resultNum = fetchListResultLocally(owner.getBucket(), resultQueue,
maxKeyNum, objectSummaries,
levelStatsList);
// 2. fetch more list results by doing one-level lists in parallel
fetchListResultRemotely(owner, listStack, resultQueue, maxKeyNum,
objectSummaries, levelStatsList, resultNum);
// 3. check if list operation ends
if (!listStack.empty() && resultQueue.isEmpty()) {
throw new RuntimeException(
"result queue is empty, but list stack is not empty: "
+ listStack);
}
String nextMarker = null;
if (!resultQueue.isEmpty()) {
if (resultQueue.peek().getType() == ListEntityType.LIST_TAIL) {
throw new RuntimeException(
"cannot put list tail (" + resultQueue.peek()
+ ") into result queue");
} else {
nextMarker =
resultQueue.peek().getType() == ListEntityType.COMMON_PREFIX
? resultQueue
.peek().getCommonPrefix()
: resultQueue.peek().getObjectSummary().getObjectKey();
}
}
return nextMarker;
}
static void fetchListResultRemotely(final OBSFileSystem owner,
final Stack<ListEntity> listStack,
final Queue<ListEntity> resultQueue, final int maxKeyNum,
final List<ObsObject> objectSummaries,
final List<LevelStats> levelStatsList,
final int resultNum) throws IOException {
int newResultNum = resultNum;
while (!listStack.empty() && (newResultNum < maxKeyNum
|| resultQueue.isEmpty())) {
List<ListObjectsRequest> oneLevelListRequests = new ArrayList<>();
List<Future<ObjectListing>> oneLevelListFutures = new ArrayList<>();
List<Integer> levels = new ArrayList<>();
List<ObjectListing> oneLevelObjectListings = new ArrayList<>();
// a. submit some one-level list tasks in parallel
submitOneLevelListTasks(owner, listStack, maxKeyNum,
oneLevelListRequests, oneLevelListFutures, levels);
// b. wait these tasks to complete
waitForOneLevelListTasksFinished(oneLevelListRequests,
oneLevelListFutures, oneLevelObjectListings);
// c. put subdir/file into result commonPrefixes and
// objectSummaries;if the number of results reaches maxKeyNum,
// cache it into resultQueue for next list batch note: unlike
// standard DFS, we put subdir directly into result list to avoid
// caching it using more space
newResultNum = handleOneLevelListTaskResult(resultQueue, maxKeyNum,
objectSummaries, levelStatsList, newResultNum,
oneLevelListRequests, levels, oneLevelObjectListings);
// d. push subdirs and list continuing tail/end into list stack in
// reversed order,so that we can pop them from the stack in order
// later
addNewListStackEntities(listStack, oneLevelListRequests, levels,
oneLevelObjectListings);
}
}
@SuppressWarnings("checkstyle:parameternumber")
static int handleOneLevelListTaskResult(final Queue<ListEntity> resultQueue,
final int maxKeyNum,
final List<ObsObject> objectSummaries,
final List<LevelStats> levelStatsList,
final int resultNum,
final List<ListObjectsRequest> oneLevelListRequests,
final List<Integer> levels,
final List<ObjectListing> oneLevelObjectListings) {
int newResultNum = resultNum;
for (int i = 0; i < oneLevelObjectListings.size(); i++) {
LOG.debug(
"one level listing with prefix=" + oneLevelListRequests.get(i)
.getPrefix()
+ ", marker=" + (
oneLevelListRequests.get(i).getMarker() != null
? oneLevelListRequests.get(i)
.getMarker()
: ""));
ObjectListing oneLevelObjectListing = oneLevelObjectListings.get(i);
LOG.debug("# of CommonPrefixes/Objects: {}/{}",
oneLevelObjectListing.getCommonPrefixes().size(),
oneLevelObjectListing.getObjects().size());
if (oneLevelObjectListing.getCommonPrefixes().isEmpty()
&& oneLevelObjectListing.getObjects().isEmpty()) {
continue;
}
for (String commonPrefix
: oneLevelObjectListing.getCommonPrefixes()) {
if (commonPrefix.equals(
oneLevelListRequests.get(i).getPrefix())) {
// skip prefix itself
continue;
}
LOG.debug("common prefix: " + commonPrefix);
if (newResultNum < maxKeyNum) {
addCommonPrefixIntoObjectList(
oneLevelListRequests.get(i).getBucketName(),
objectSummaries,
commonPrefix);
increaseLevelStats(levelStatsList, levels.get(i), true);
newResultNum++;
} else {
resultQueue.add(
new ListEntity(commonPrefix, levels.get(i)));
}
}
for (ObsObject obj : oneLevelObjectListing.getObjects()) {
if (obj.getObjectKey()
.equals(oneLevelListRequests.get(i).getPrefix())) {
// skip prefix itself
continue;
}
LOG.debug("object: {}, size: {}", obj.getObjectKey(),
obj.getMetadata().getContentLength());
if (newResultNum < maxKeyNum) {
objectSummaries.add(obj);
increaseLevelStats(levelStatsList, levels.get(i),
obj.getObjectKey().endsWith("/"));
newResultNum++;
} else {
resultQueue.add(new ListEntity(obj, levels.get(i)));
}
}
}
return newResultNum;
}
static void waitForOneLevelListTasksFinished(
final List<ListObjectsRequest> oneLevelListRequests,
final List<Future<ObjectListing>> oneLevelListFutures,
final List<ObjectListing> oneLevelObjectListings)
throws IOException {
for (int i = 0; i < oneLevelListFutures.size(); i++) {
try {
oneLevelObjectListings.add(oneLevelListFutures.get(i).get());
} catch (InterruptedException e) {
LOG.warn("Interrupted while listing using DFS, prefix="
+ oneLevelListRequests.get(i).getPrefix() + ", marker="
+ (oneLevelListRequests.get(i).getMarker() != null
? oneLevelListRequests.get(i).getMarker()
: ""));
throw new InterruptedIOException(
"Interrupted while listing using DFS, prefix="
+ oneLevelListRequests.get(i).getPrefix() + ", marker="
+ (oneLevelListRequests.get(i).getMarker() != null
? oneLevelListRequests.get(i).getMarker()
: ""));
} catch (ExecutionException e) {
LOG.error("Exception while listing using DFS, prefix="
+ oneLevelListRequests.get(i).getPrefix() + ", marker="
+ (oneLevelListRequests.get(i).getMarker() != null
? oneLevelListRequests.get(i).getMarker()
: ""),
e);
for (Future<ObjectListing> future : oneLevelListFutures) {
future.cancel(true);
}
throw OBSCommonUtils.extractException(
"Listing using DFS with exception, marker="
+ (oneLevelListRequests.get(i).getMarker() != null
? oneLevelListRequests.get(i).getMarker()
: ""),
oneLevelListRequests.get(i).getPrefix(), e);
}
}
}
static void submitOneLevelListTasks(final OBSFileSystem owner,
final Stack<ListEntity> listStack, final int maxKeyNum,
final List<ListObjectsRequest> oneLevelListRequests,
final List<Future<ObjectListing>> oneLevelListFutures,
final List<Integer> levels) {
for (int i = 0;
i < owner.getListParallelFactor() && !listStack.empty(); i++) {
ListEntity listEntity = listStack.pop();
if (listEntity.getType() == ListEntityType.LIST_TAIL) {
if (listEntity.getNextMarker() != null) {
ListObjectsRequest oneLevelListRequest
= new ListObjectsRequest();
oneLevelListRequest.setBucketName(owner.getBucket());
oneLevelListRequest.setPrefix(listEntity.getPrefix());
oneLevelListRequest.setMarker(listEntity.getNextMarker());
oneLevelListRequest.setMaxKeys(
Math.min(maxKeyNum, owner.getMaxKeys()));
oneLevelListRequest.setDelimiter("/");
oneLevelListRequests.add(oneLevelListRequest);
oneLevelListFutures.add(owner.getBoundedListThreadPool()
.submit(() -> OBSCommonUtils.commonContinueListObjects(
owner, oneLevelListRequest)));
levels.add(listEntity.getLevel());
}
// avoid adding list tasks in different levels later
break;
} else {
String oneLevelListPrefix =
listEntity.getType() == ListEntityType.COMMON_PREFIX
? listEntity.getCommonPrefix()
: listEntity.getObjectSummary().getObjectKey();
ListObjectsRequest oneLevelListRequest = OBSCommonUtils
.createListObjectsRequest(owner, oneLevelListPrefix, "/",
maxKeyNum);
oneLevelListRequests.add(oneLevelListRequest);
oneLevelListFutures.add(owner.getBoundedListThreadPool()
.submit(() -> OBSCommonUtils.commonListObjects(owner,
oneLevelListRequest)));
levels.add(listEntity.getLevel() + 1);
}
}
}
static void addNewListStackEntities(final Stack<ListEntity> listStack,
final List<ListObjectsRequest> oneLevelListRequests,
final List<Integer> levels,
final List<ObjectListing> oneLevelObjectListings) {
for (int i = oneLevelObjectListings.size() - 1; i >= 0; i--) {
ObjectListing oneLevelObjectListing = oneLevelObjectListings.get(i);
if (oneLevelObjectListing.getCommonPrefixes().isEmpty()
&& oneLevelObjectListing.getObjects()
.isEmpty()) {
continue;
}
listStack.push(new ListEntity(oneLevelObjectListing.getPrefix(),
oneLevelObjectListing.isTruncated()
? oneLevelObjectListing.getNextMarker()
: null,
levels.get(i)));
ListIterator<String> commonPrefixListIterator
= oneLevelObjectListing.getCommonPrefixes()
.listIterator(oneLevelObjectListing.getCommonPrefixes().size());
while (commonPrefixListIterator.hasPrevious()) {
String commonPrefix = commonPrefixListIterator.previous();
if (commonPrefix.equals(
oneLevelListRequests.get(i).getPrefix())) {
// skip prefix itself
continue;
}
listStack.push(new ListEntity(commonPrefix, levels.get(i)));
}
ListIterator<ObsObject> objectSummaryListIterator
= oneLevelObjectListing.getObjects()
.listIterator(oneLevelObjectListing.getObjects().size());
while (objectSummaryListIterator.hasPrevious()) {
ObsObject objectSummary = objectSummaryListIterator.previous();
if (objectSummary.getObjectKey()
.equals(oneLevelListRequests.get(i).getPrefix())) {
// skip prefix itself
continue;
}
if (objectSummary.getObjectKey().endsWith("/")) {
listStack.push(
new ListEntity(objectSummary, levels.get(i)));
}
}
}
}
static int fetchListResultLocally(final String bucketName,
final Queue<ListEntity> resultQueue, final int maxKeyNum,
final List<ObsObject> objectSummaries,
final List<LevelStats> levelStatsList) {
int resultNum = 0;
while (!resultQueue.isEmpty() && resultNum < maxKeyNum) {
ListEntity listEntity = resultQueue.poll();
if (listEntity.getType() == ListEntityType.LIST_TAIL) {
throw new RuntimeException("cannot put list tail (" + listEntity
+ ") into result queue");
} else if (listEntity.getType() == ListEntityType.COMMON_PREFIX) {
addCommonPrefixIntoObjectList(bucketName, objectSummaries,
listEntity.getCommonPrefix());
increaseLevelStats(levelStatsList, listEntity.getLevel(), true);
resultNum++;
} else {
objectSummaries.add(listEntity.getObjectSummary());
increaseLevelStats(levelStatsList, listEntity.getLevel(),
listEntity.getObjectSummary().getObjectKey().endsWith("/"));
resultNum++;
}
}
return resultNum;
}
static void addCommonPrefixIntoObjectList(final String bucketName,
final List<ObsObject> objectSummaries,
final String commonPrefix) {
ObsObject objectSummary = new ObsObject();
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(0L);
objectSummary.setBucketName(bucketName);
objectSummary.setObjectKey(commonPrefix);
objectSummary.setMetadata(objectMetadata);
objectSummaries.add(objectSummary);
}
static OBSFsDFSListing fsDFSListObjects(final OBSFileSystem owner,
final ListObjectsRequest request) throws IOException {
List<ObsObject> objectSummaries = new ArrayList<>();
List<String> commonPrefixes = new ArrayList<>();
String bucketName = owner.getBucket();
String prefix = request.getPrefix();
int maxKeyNum = request.getMaxKeys();
if (request.getDelimiter() != null) {
throw new IllegalArgumentException(
"illegal delimiter: " + request.getDelimiter());
}
if (request.getMarker() != null) {
throw new IllegalArgumentException(
"illegal marker: " + request.getMarker());
}
Stack<ListEntity> listStack = new Stack<>();
Queue<ListEntity> resultQueue = new LinkedList<>();
List<LevelStats> levelStatsList = new ArrayList<>();
listStack.push(new ListEntity(prefix, 0));
increaseLevelStats(levelStatsList, 0, true);
String nextMarker = fsDFSListNextBatch(owner, listStack, resultQueue,
null, maxKeyNum, objectSummaries,
levelStatsList);
if (nextMarker == null) {
StringBuilder levelStatsStringBuilder = new StringBuilder();
levelStatsStringBuilder.append("bucketName=").append(bucketName)
.append(", prefix=").append(prefix).append(": ");
for (LevelStats levelStats : levelStatsList) {
levelStatsStringBuilder.append("level=")
.append(levelStats.getLevel())
.append(", dirNum=")
.append(levelStats.getDirNum())
.append(", fileNum=")
.append(levelStats.getFileNum())
.append("; ");
}
LOG.debug("[list level statistics info] "
+ levelStatsStringBuilder.toString());
}
return new OBSFsDFSListing(request,
objectSummaries,
commonPrefixes,
nextMarker,
listStack,
resultQueue,
levelStatsList);
}
static OBSFsDFSListing fsDFSContinueListObjects(final OBSFileSystem owner,
final OBSFsDFSListing obsFsDFSListing)
throws IOException {
List<ObsObject> objectSummaries = new ArrayList<>();
List<String> commonPrefixes = new ArrayList<>();
String bucketName = owner.getBucket();
String prefix = obsFsDFSListing.getPrefix();
String marker = obsFsDFSListing.getNextMarker();
int maxKeyNum = obsFsDFSListing.getMaxKeys();
if (obsFsDFSListing.getDelimiter() != null) {
throw new IllegalArgumentException(
"illegal delimiter: " + obsFsDFSListing.getDelimiter());
}
Stack<ListEntity> listStack = obsFsDFSListing.getListStack();
Queue<ListEntity> resultQueue = obsFsDFSListing.getResultQueue();
List<LevelStats> levelStatsList = obsFsDFSListing.getLevelStatsList();
String nextMarker = fsDFSListNextBatch(owner, listStack, resultQueue,
marker, maxKeyNum, objectSummaries,
levelStatsList);
if (nextMarker == null) {
StringBuilder levelStatsStringBuilder = new StringBuilder();
levelStatsStringBuilder.append("bucketName=").append(bucketName)
.append(", prefix=").append(prefix).append(": ");
for (LevelStats levelStats : levelStatsList) {
levelStatsStringBuilder.append("level=")
.append(levelStats.getLevel())
.append(", dirNum=")
.append(levelStats.getDirNum())
.append(", fileNum=")
.append(levelStats.getFileNum())
.append("; ");
}
LOG.debug("[list level statistics info] "
+ levelStatsStringBuilder.toString());
}
return new OBSFsDFSListing(obsFsDFSListing,
objectSummaries,
commonPrefixes,
nextMarker,
listStack,
resultQueue,
levelStatsList);
}
/**
* List entity type definition.
*/
enum ListEntityType {
/**
* Common prefix.
*/
COMMON_PREFIX,
/**
* Object summary.
*/
OBJECT_SUMMARY,
/**
* List tail.
*/
LIST_TAIL
}
/**
* List entity for OBS depth first search listing.
*/
static class ListEntity {
/**
* List entity type.
*/
private ListEntityType type;
/**
* Entity level.
*/
private final int level;
/**
* For COMMON_PREFIX.
*/
private String commonPrefix = null;
/**
* For OBJECT_SUMMARY.
*/
private ObsObject objectSummary = null;
/**
* For LIST_TAIL.
*/
private String prefix = null;
/**
* Next marker.
*/
private String nextMarker = null;
ListEntity(final String comPrefix, final int entityLevel) {
this.type = ListEntityType.COMMON_PREFIX;
this.commonPrefix = comPrefix;
this.level = entityLevel;
}
ListEntity(final ObsObject summary, final int entityLevel) {
this.type = ListEntityType.OBJECT_SUMMARY;
this.objectSummary = summary;
this.level = entityLevel;
}
ListEntity(final String pf, final String nextMk,
final int entityLevel) {
this.type = ListEntityType.LIST_TAIL;
this.prefix = pf;
this.nextMarker = nextMk;
this.level = entityLevel;
}
ListEntityType getType() {
return type;
}
int getLevel() {
return level;
}
String getCommonPrefix() {
return commonPrefix;
}
ObsObject getObjectSummary() {
return objectSummary;
}
public String getPrefix() {
return prefix;
}
String getNextMarker() {
return nextMarker;
}
@Override
public String toString() {
return "type: " + type
+ ", commonPrefix: " + (commonPrefix != null
? commonPrefix
: "")
+ ", objectSummary: " + (objectSummary != null
? objectSummary
: "")
+ ", prefix: " + (prefix != null ? prefix : "")
+ ", nextMarker: " + (nextMarker != null ? nextMarker : "");
}
}
/**
* Level statistics for OBS depth first search listing.
*/
static class LevelStats {
/**
* Entity level.
*/
private int level;
/**
* Directory num.
*/
private long dirNum;
/**
* File num.
*/
private long fileNum;
LevelStats(final int entityLevel) {
this.level = entityLevel;
this.dirNum = 0;
this.fileNum = 0;
}
void increaseDirNum() {
dirNum++;
}
void increaseFileNum() {
fileNum++;
}
int getLevel() {
return level;
}
long getDirNum() {
return dirNum;
}
long getFileNum() {
return fileNum;
}
}
/**
* Stack of entity list..
*/
private Stack<ListEntity> listStack;
/**
* Queue of entity list.
*/
private Queue<ListEntity> resultQueue;
/**
* List of levelStats.
*/
private List<LevelStats> levelStatsList;
OBSFsDFSListing(final ListObjectsRequest request,
final List<ObsObject> objectSummaries,
final List<String> commonPrefixes,
final String nextMarker,
final Stack<ListEntity> listEntityStack,
final Queue<ListEntity> listEntityQueue,
final List<LevelStats> listLevelStats) {
super(objectSummaries,
commonPrefixes,
request.getBucketName(),
nextMarker != null,
request.getPrefix(),
null,
request.getMaxKeys(),
null,
nextMarker,
null);
this.listStack = listEntityStack;
this.resultQueue = listEntityQueue;
this.levelStatsList = listLevelStats;
}
OBSFsDFSListing(final OBSFsDFSListing obsFsDFSListing,
final List<ObsObject> objectSummaries,
final List<String> commonPrefixes,
final String nextMarker,
final Stack<ListEntity> listEntityStack,
final Queue<ListEntity> listEntityQueue,
final List<LevelStats> listLevelStats) {
super(objectSummaries,
commonPrefixes,
obsFsDFSListing.getBucketName(),
nextMarker != null,
obsFsDFSListing.getPrefix(),
obsFsDFSListing.getNextMarker(),
obsFsDFSListing.getMaxKeys(),
null,
nextMarker,
null);
this.listStack = listEntityStack;
this.resultQueue = listEntityQueue;
this.levelStatsList = listLevelStats;
}
Stack<ListEntity> getListStack() {
return listStack;
}
Queue<ListEntity> getResultQueue() {
return resultQueue;
}
List<LevelStats> getLevelStatsList() {
return levelStatsList;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.util.Preconditions;
import com.obs.services.exception.ObsException;
import java.io.IOException;
/**
* IOException equivalent to {@link ObsException}.
*/
class OBSIOException extends IOException {
private static final long serialVersionUID = -1582681108285856259L;
/**
* Peration message.
*/
private final String operation;
OBSIOException(final String operationMsg, final ObsException cause) {
super(cause);
Preconditions.checkArgument(operationMsg != null,
"Null 'operation' argument");
Preconditions.checkArgument(cause != null, "Null 'cause' argument");
this.operation = operationMsg;
}
public ObsException getCause() {
return (ObsException) super.getCause();
}
@Override
public String getMessage() {
return operation + ": " + getCause().getErrorMessage()
+ ", detailMessage: " + super.getMessage();
}
}

View File

@ -0,0 +1,656 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.exception.ObsException;
import com.obs.services.model.ListObjectsRequest;
import com.obs.services.model.ObjectListing;
import com.obs.services.model.ObsObject;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
/**
* OBS listing implementation.
*/
class OBSListing {
/**
* A Path filter which accepts all filenames.
*/
static final PathFilter ACCEPT_ALL =
new PathFilter() {
@Override
public boolean accept(final Path file) {
return true;
}
@Override
public String toString() {
return "ACCEPT_ALL";
}
};
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(OBSListing.class);
/**
* OBS File System instance.
*/
private final OBSFileSystem owner;
OBSListing(final OBSFileSystem ownerFS) {
this.owner = ownerFS;
}
/**
* Create a FileStatus iterator against a path, with a given list object
* request.
*
* @param listPath path of the listing
* @param request initial request to make
* @param filter the filter on which paths to accept
* @param acceptor the class/predicate to decide which entries to accept in
* the listing based on the full file status.
* @return the iterator
* @throws IOException IO Problems
*/
FileStatusListingIterator createFileStatusListingIterator(
final Path listPath,
final ListObjectsRequest request,
final PathFilter filter,
final FileStatusAcceptor acceptor)
throws IOException {
return new FileStatusListingIterator(
new ObjectListingIterator(listPath, request), filter, acceptor);
}
/**
* Create a located status iterator over a file status iterator.
*
* @param statusIterator an iterator over the remote status entries
* @return a new remote iterator
*/
LocatedFileStatusIterator createLocatedFileStatusIterator(
final RemoteIterator<FileStatus> statusIterator) {
return new LocatedFileStatusIterator(statusIterator);
}
/**
* Interface to implement by the logic deciding whether to accept a summary
* entry or path as a valid file or directory.
*/
interface FileStatusAcceptor {
/**
* Predicate to decide whether or not to accept a summary entry.
*
* @param keyPath qualified path to the entry
* @param summary summary entry
* @return true if the entry is accepted (i.e. that a status entry should be
* generated.
*/
boolean accept(Path keyPath, ObsObject summary);
/**
* Predicate to decide whether or not to accept a prefix.
*
* @param keyPath qualified path to the entry
* @param commonPrefix the prefix
* @return true if the entry is accepted (i.e. that a status entry should be
* generated.)
*/
boolean accept(Path keyPath, String commonPrefix);
}
/**
* A remote iterator which only iterates over a single `LocatedFileStatus`
* value.
*
* <p>If the status value is null, the iterator declares that it has no
* data. This iterator is used to handle
* {@link OBSFileSystem#listStatus(Path)}calls where the path handed in
* refers to a file, not a directory: this is
* the iterator returned.
*/
static final class SingleStatusRemoteIterator
implements RemoteIterator<LocatedFileStatus> {
/**
* The status to return; set to null after the first iteration.
*/
private LocatedFileStatus status;
/**
* Constructor.
*
* @param locatedFileStatus status value: may be null, in which case the
* iterator is empty.
*/
SingleStatusRemoteIterator(final LocatedFileStatus locatedFileStatus) {
this.status = locatedFileStatus;
}
/**
* {@inheritDoc}
*
* @return true if there is a file status to return: this is always false
* for the second iteration, and may be false for the first.
*/
@Override
public boolean hasNext() {
return status != null;
}
/**
* {@inheritDoc}
*
* @return the non-null status element passed in when the instance was
* constructed, if it ha not already been retrieved.
* @throws NoSuchElementException if this is the second call, or it is the
* first call and a null
* {@link LocatedFileStatus}
* entry was passed to the constructor.
*/
@Override
public LocatedFileStatus next() {
if (hasNext()) {
LocatedFileStatus s = this.status;
status = null;
return s;
} else {
throw new NoSuchElementException();
}
}
}
/**
* Accept all entries except the base path and those which map to OBS pseudo
* directory markers.
*/
static class AcceptFilesOnly implements FileStatusAcceptor {
/**
* path to qualify.
*/
private final Path qualifiedPath;
AcceptFilesOnly(final Path path) {
this.qualifiedPath = path;
}
/**
* Reject a summary entry if the key path is the qualified Path, or it ends
* with {@code "_$folder$"}.
*
* @param keyPath key path of the entry
* @param summary summary entry
* @return true if the entry is accepted (i.e. that a status entry should be
* generated.
*/
@Override
public boolean accept(final Path keyPath, final ObsObject summary) {
return !keyPath.equals(qualifiedPath)
&& !summary.getObjectKey()
.endsWith(OBSConstants.OBS_FOLDER_SUFFIX)
&& !OBSCommonUtils.objectRepresentsDirectory(
summary.getObjectKey(),
summary.getMetadata().getContentLength());
}
/**
* Accept no directory paths.
*
* @param keyPath qualified path to the entry
* @param prefix common prefix in listing.
* @return false, always.
*/
@Override
public boolean accept(final Path keyPath, final String prefix) {
return false;
}
}
/**
* Accept all entries except the base path and those which map to OBS pseudo
* directory markers.
*/
static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
/**
* Base path.
*/
private final Path qualifiedPath;
/**
* Constructor.
*
* @param path an already-qualified path.
*/
AcceptAllButSelfAndS3nDirs(final Path path) {
this.qualifiedPath = path;
}
/**
* Reject a summary entry if the key path is the qualified Path, or it ends
* with {@code "_$folder$"}.
*
* @param keyPath key path of the entry
* @param summary summary entry
* @return true if the entry is accepted (i.e. that a status entry should be
* generated.)
*/
@Override
public boolean accept(final Path keyPath, final ObsObject summary) {
return !keyPath.equals(qualifiedPath) && !summary.getObjectKey()
.endsWith(OBSConstants.OBS_FOLDER_SUFFIX);
}
/**
* Accept all prefixes except the one for the base path, "self".
*
* @param keyPath qualified path to the entry
* @param prefix common prefix in listing.
* @return true if the entry is accepted (i.e. that a status entry should be
* generated.
*/
@Override
public boolean accept(final Path keyPath, final String prefix) {
return !keyPath.equals(qualifiedPath);
}
}
/**
* Wraps up object listing into a remote iterator which will ask for more
* listing data if needed.
*
* <p>This is a complex operation, especially the process to determine if
* there are more entries remaining. If there are no more results remaining in
* the (filtered) results of the current listing request, then another request
* is made
* <i>and those results filtered</i> before the iterator can declare that
* there is more data available.
*
* <p>The need to filter the results precludes the iterator from simply
* declaring that if the {@link ObjectListingIterator#hasNext()} is true then
* there are more results. Instead the next batch of results must be retrieved
* and filtered.
*
* <p>What does this mean? It means that remote requests to retrieve new
* batches of object listings are made in the {@link #hasNext()} call; the
* {@link #next()} call simply returns the filtered results of the last
* listing processed. However, do note that {@link #next()} calls {@link
* #hasNext()} during its operation. This is critical to ensure that a listing
* obtained through a sequence of {@link #next()} will complete with the same
* set of results as a classic {@code while(it.hasNext()} loop.
*
* <p>Thread safety: None.
*/
class FileStatusListingIterator implements RemoteIterator<FileStatus> {
/**
* Source of objects.
*/
private final ObjectListingIterator source;
/**
* Filter of paths from API call.
*/
private final PathFilter filter;
/**
* Filter of entries from file status.
*/
private final FileStatusAcceptor acceptor;
/**
* Request batch size.
*/
private int batchSize;
/**
* Iterator over the current set of results.
*/
private ListIterator<FileStatus> statusBatchIterator;
/**
* Create an iterator over file status entries.
*
* @param listPath the listing iterator from a listObjects call.
* @param pathFilter the filter on which paths to accept
* @param fileStatusAcceptor the class/predicate to decide which entries to
* accept in the listing based on the full file
* status.
* @throws IOException IO Problems
*/
FileStatusListingIterator(
final ObjectListingIterator listPath, final PathFilter pathFilter,
final FileStatusAcceptor fileStatusAcceptor)
throws IOException {
this.source = listPath;
this.filter = pathFilter;
this.acceptor = fileStatusAcceptor;
// build the first set of results. This will not trigger any
// remote IO, assuming the source iterator is in its initial
// iteration
requestNextBatch();
}
/**
* Report whether or not there is new data available. If there is data in
* the local filtered list, return true. Else: request more data util that
* condition is met, or there is no more remote listing data.
*
* @return true if a call to {@link #next()} will succeed.
* @throws IOException on any failure to request next batch
*/
@Override
public boolean hasNext() throws IOException {
return statusBatchIterator.hasNext() || requestNextBatch();
}
@Override
public FileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException();
}
return statusBatchIterator.next();
}
/**
* Try to retrieve another batch. Note that for the initial batch, {@link
* ObjectListingIterator} does not generate a request; it simply returns the
* initial set.
*
* @return true if a new batch was created.
* @throws IOException IO problems
*/
private boolean requestNextBatch() throws IOException {
// look for more object listing batches being available
while (source.hasNext()) {
// if available, retrieve it and build the next status
if (buildNextStatusBatch(source.next())) {
// this batch successfully generated entries matching
// the filters/acceptors;
// declare that the request was successful
return true;
} else {
LOG.debug(
"All entries in batch were filtered...continuing");
}
}
// if this code is reached, it means that all remaining
// object lists have been retrieved, and there are no new entries
// to return.
return false;
}
/**
* Build the next status batch from a listing.
*
* @param objects the next object listing
* @return true if this added any entries after filtering
*/
private boolean buildNextStatusBatch(final ObjectListing objects) {
// counters for debug logs
int added = 0;
int ignored = 0;
// list to fill in with results. Initial size will be list maximum.
List<FileStatus> stats =
new ArrayList<>(
objects.getObjects().size() + objects.getCommonPrefixes()
.size());
// objects
for (ObsObject summary : objects.getObjects()) {
String key = summary.getObjectKey();
Path keyPath = OBSCommonUtils.keyToQualifiedPath(owner, key);
if (LOG.isDebugEnabled()) {
LOG.debug("{}: {}", keyPath,
OBSCommonUtils.stringify(summary));
}
// Skip over keys that are ourselves and old OBS _$folder$ files
if (acceptor.accept(keyPath, summary) && filter.accept(
keyPath)) {
FileStatus status =
OBSCommonUtils.createFileStatus(
keyPath, summary,
owner.getDefaultBlockSize(keyPath),
owner.getUsername());
LOG.debug("Adding: {}", status);
stats.add(status);
added++;
} else {
LOG.debug("Ignoring: {}", keyPath);
ignored++;
}
}
// prefixes: always directories
for (ObsObject prefix : objects.getExtenedCommonPrefixes()) {
String key = prefix.getObjectKey();
Path keyPath = OBSCommonUtils.keyToQualifiedPath(owner, key);
if (acceptor.accept(keyPath, key) && filter.accept(keyPath)) {
long lastModified =
prefix.getMetadata().getLastModified() == null
? System.currentTimeMillis()
: OBSCommonUtils.dateToLong(
prefix.getMetadata().getLastModified());
FileStatus status = new OBSFileStatus(keyPath, lastModified,
lastModified, owner.getUsername());
LOG.debug("Adding directory: {}", status);
added++;
stats.add(status);
} else {
LOG.debug("Ignoring directory: {}", keyPath);
ignored++;
}
}
// finish up
batchSize = stats.size();
statusBatchIterator = stats.listIterator();
boolean hasNext = statusBatchIterator.hasNext();
LOG.debug(
"Added {} entries; ignored {}; hasNext={}; hasMoreObjects={}",
added,
ignored,
hasNext,
objects.isTruncated());
return hasNext;
}
/**
* Get the number of entries in the current batch.
*
* @return a number, possibly zero.
*/
public int getBatchSize() {
return batchSize;
}
}
/**
* Wraps up OBS `ListObjects` requests in a remote iterator which will ask for
* more listing data if needed.
*
* <p>That is:
*
* <p>1. The first invocation of the {@link #next()} call will return the
* results of the first request, the one created during the construction of
* the instance.
*
* <p>2. Second and later invocations will continue the ongoing listing,
* calling {@link OBSCommonUtils#continueListObjects} to request the next
* batch of results.
*
* <p>3. The {@link #hasNext()} predicate returns true for the initial call,
* where {@link #next()} will return the initial results. It declares that it
* has future results iff the last executed request was truncated.
*
* <p>Thread safety: none.
*/
class ObjectListingIterator implements RemoteIterator<ObjectListing> {
/**
* The path listed.
*/
private final Path listPath;
/**
* The most recent listing results.
*/
private ObjectListing objects;
/**
* Indicator that this is the first listing.
*/
private boolean firstListing = true;
/**
* Count of how many listings have been requested (including initial
* result).
*/
private int listingCount = 1;
/**
* Maximum keys in a request.
*/
private int maxKeys;
/**
* Constructor -calls {@link OBSCommonUtils#listObjects} on the request to
* populate the initial set of results/fail if there was a problem talking
* to the bucket.
*
* @param path path of the listing
* @param request initial request to make
* @throws IOException on any failure to list objects
*/
ObjectListingIterator(final Path path,
final ListObjectsRequest request)
throws IOException {
this.listPath = path;
this.maxKeys = owner.getMaxKeys();
this.objects = OBSCommonUtils.listObjects(owner, request);
}
/**
* Declare that the iterator has data if it is either is the initial
* iteration or it is a later one and the last listing obtained was
* incomplete.
*/
@Override
public boolean hasNext() {
return firstListing || objects.isTruncated();
}
/**
* Ask for the next listing. For the first invocation, this returns the
* initial set, with no remote IO. For later requests, OBS will be queried,
* hence the calls may block or fail.
*
* @return the next object listing.
* @throws IOException if a query made of OBS fails.
* @throws NoSuchElementException if there is no more data to list.
*/
@Override
public ObjectListing next() throws IOException {
if (firstListing) {
// on the first listing, don't request more data.
// Instead just clear the firstListing flag so that it future
// calls will request new data.
firstListing = false;
} else {
try {
if (!objects.isTruncated()) {
// nothing more to request: fail.
throw new NoSuchElementException(
"No more results in listing of " + listPath);
}
// need to request a new set of objects.
LOG.debug("[{}], Requesting next {} objects under {}",
listingCount, maxKeys, listPath);
objects = OBSCommonUtils.continueListObjects(owner,
objects);
listingCount++;
LOG.debug("New listing status: {}", this);
} catch (ObsException e) {
throw OBSCommonUtils.translateException("listObjects()",
listPath, e);
}
}
return objects;
}
@Override
public String toString() {
return "Object listing iterator against "
+ listPath
+ "; listing count "
+ listingCount
+ "; isTruncated="
+ objects.isTruncated();
}
}
/**
* Take a remote iterator over a set of {@link FileStatus} instances and
* return a remote iterator of {@link LocatedFileStatus} instances.
*/
class LocatedFileStatusIterator
implements RemoteIterator<LocatedFileStatus> {
/**
* File status.
*/
private final RemoteIterator<FileStatus> statusIterator;
/**
* Constructor.
*
* @param statusRemoteIterator an iterator over the remote status entries
*/
LocatedFileStatusIterator(
final RemoteIterator<FileStatus> statusRemoteIterator) {
this.statusIterator = statusRemoteIterator;
}
@Override
public boolean hasNext() throws IOException {
return statusIterator.hasNext();
}
@Override
public LocatedFileStatus next() throws IOException {
return OBSCommonUtils.toLocatedFileStatus(owner,
statusIterator.next());
}
}
}

View File

@ -0,0 +1,350 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.Objects;
import static org.apache.commons.lang3.StringUtils.equalsIgnoreCase;
/**
* Helper for OBS login.
*/
final class OBSLoginHelper {
/**
* login warning.
*/
public static final String LOGIN_WARNING =
"The Filesystem URI contains login details."
+ " This is insecure and may be unsupported in future.";
/**
* plus warning.
*/
public static final String PLUS_WARNING =
"Secret key contains a special character that should be URL encoded! "
+ "Attempting to resolve...";
/**
* defined plus unencoded char.
*/
public static final String PLUS_UNENCODED = "+";
/**
* defined plus encoded char.
*/
public static final String PLUS_ENCODED = "%2B";
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
OBSLoginHelper.class);
private OBSLoginHelper() {
}
/**
* Build the filesystem URI. This can include stripping down of part of the
* URI.
*
* @param uri filesystem uri
* @return the URI to use as the basis for FS operation and qualifying paths.
* @throws IllegalArgumentException if the URI is in some way invalid.
*/
public static URI buildFSURI(final URI uri) {
Objects.requireNonNull(uri, "null uri");
Objects.requireNonNull(uri.getScheme(), "null uri.getScheme()");
if (uri.getHost() == null && uri.getAuthority() != null) {
Objects.requireNonNull(
uri.getHost(),
"null uri host."
+ " This can be caused by unencoded / in the "
+ "password string");
}
Objects.requireNonNull(uri.getHost(), "null uri host.");
return URI.create(uri.getScheme() + "://" + uri.getHost());
}
/**
* Create a stripped down string value for error messages.
*
* @param pathUri URI
* @return a shortened schema://host/path value
*/
public static String toString(final URI pathUri) {
return pathUri != null
? String.format("%s://%s/%s", pathUri.getScheme(),
pathUri.getHost(), pathUri.getPath())
: "(null URI)";
}
/**
* Extract the login details from a URI, logging a warning if the URI contains
* these.
*
* @param name URI of the filesystem
* @return a login tuple, possibly empty.
*/
public static Login extractLoginDetailsWithWarnings(final URI name) {
Login login = extractLoginDetails(name);
if (login.hasLogin()) {
LOG.warn(LOGIN_WARNING);
}
return login;
}
/**
* Extract the login details from a URI.
*
* @param name URI of the filesystem
* @return a login tuple, possibly empty.
*/
public static Login extractLoginDetails(final URI name) {
try {
String authority = name.getAuthority();
if (authority == null) {
return Login.EMPTY;
}
int loginIndex = authority.indexOf('@');
if (loginIndex < 0) {
// no login
return Login.EMPTY;
}
String login = authority.substring(0, loginIndex);
int loginSplit = login.indexOf(':');
if (loginSplit > 0) {
String user = login.substring(0, loginSplit);
String encodedPassword = login.substring(loginSplit + 1);
if (encodedPassword.contains(PLUS_UNENCODED)) {
LOG.warn(PLUS_WARNING);
encodedPassword = encodedPassword.replaceAll(
"\\" + PLUS_UNENCODED, PLUS_ENCODED);
}
String password = URLDecoder.decode(encodedPassword, "UTF-8");
return new Login(user, password);
} else if (loginSplit == 0) {
// there is no user, just a password. In this case,
// there's no login
return Login.EMPTY;
} else {
return new Login(login, "");
}
} catch (UnsupportedEncodingException e) {
// this should never happen; translate it if it does.
throw new RuntimeException(e);
}
}
/**
* Canonicalize the given URI.
*
* <p>This strips out login information.
*
* @param uri the URI to canonicalize
* @param defaultPort default port to use in canonicalized URI if the input
* URI has no port and this value is greater than 0
* @return a new, canonicalized URI.
*/
public static URI canonicalizeUri(final URI uri, final int defaultPort) {
URI newUri = uri;
if (uri.getPort() == -1 && defaultPort > 0) {
// reconstruct the uri with the default port set
try {
newUri =
new URI(
newUri.getScheme(),
null,
newUri.getHost(),
defaultPort,
newUri.getPath(),
newUri.getQuery(),
newUri.getFragment());
} catch (URISyntaxException e) {
// Should never happen!
throw new AssertionError(
"Valid URI became unparseable: " + newUri);
}
}
return newUri;
}
/**
* Check the path, ignoring authentication details. See {@link
* OBSFileSystem#checkPath(Path)} for the operation of this.
*
* <p>Essentially
*
* <ol>
* <li>The URI is canonicalized.
* <li>If the schemas match, the hosts are compared.
* <li>If there is a mismatch between null/non-null host,
* the default FS values are used to patch in the host.
* </ol>
* <p>
* That all originates in the core FS; the sole change here being to use
* {@link URI#getHost()}over {@link URI#getAuthority()}. Some of that code
* looks a relic of the code anti-pattern of using "hdfs:file.txt" to define
* the path without declaring the hostname. It's retained for compatibility.
*
* @param conf FS configuration
* @param fsUri the FS URI
* @param path path to check
* @param defaultPort default port of FS
*/
public static void checkPath(final Configuration conf, final URI fsUri,
final Path path, final int defaultPort) {
URI pathUri = path.toUri();
String thatScheme = pathUri.getScheme();
if (thatScheme == null) {
// fs is relative
return;
}
URI thisUri = canonicalizeUri(fsUri, defaultPort);
String thisScheme = thisUri.getScheme();
// hostname and scheme are not case sensitive in these checks
if (equalsIgnoreCase(thisScheme, thatScheme)) { // schemes match
String thisHost = thisUri.getHost();
String thatHost = pathUri.getHost();
if (thatHost == null
&& // path's host is null
thisHost != null) { // fs has a host
URI defaultUri = FileSystem.getDefaultUri(conf);
if (equalsIgnoreCase(thisScheme, defaultUri.getScheme())) {
pathUri
= defaultUri; // schemes match, so use this uri instead
} else {
pathUri = null; // can't determine auth of the path
}
}
if (pathUri != null) {
// canonicalize uri before comparing with this fs
pathUri = canonicalizeUri(pathUri, defaultPort);
thatHost = pathUri.getHost();
if (equalsIgnoreCase(thisHost, thatHost)) {
return;
}
}
}
// make sure the exception strips out any auth details
throw new IllegalArgumentException(
"Wrong FS " + OBSLoginHelper.toString(pathUri) + " -expected "
+ fsUri);
}
/**
* Simple tuple of login details.
*/
public static class Login {
/**
* Defined empty login instance.
*/
public static final Login EMPTY = new Login();
/**
* Defined user name.
*/
private final String user;
/**
* Defined password.
*/
private final String password;
/**
* Login token.
*/
private final String token;
/**
* Create an instance with no login details. Calls to {@link #hasLogin()}
* return false.
*/
Login() {
this("", "");
}
Login(final String userName, final String passwd) {
this(userName, passwd, null);
}
Login(final String userName, final String passwd,
final String sessionToken) {
this.user = userName;
this.password = passwd;
this.token = sessionToken;
}
/**
* Predicate to verify login details are defined.
*
* @return true if the username is defined (not null, not empty).
*/
public boolean hasLogin() {
return StringUtils.isNotEmpty(user);
}
/**
* Equality test matches user and password.
*
* @param o other object
* @return true if the objects are considered equivalent.
*/
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Login that = (Login) o;
return Objects.equals(user, that.user) && Objects.equals(password,
that.password);
}
@Override
public int hashCode() {
return Objects.hash(user, password);
}
public String getUser() {
return user;
}
public String getPassword() {
return password;
}
public String getToken() {
return token;
}
}
}

View File

@ -0,0 +1,897 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.exception.ObsException;
import com.obs.services.model.AbortMultipartUploadRequest;
import com.obs.services.model.CompleteMultipartUploadRequest;
import com.obs.services.model.CopyObjectRequest;
import com.obs.services.model.CopyObjectResult;
import com.obs.services.model.CopyPartRequest;
import com.obs.services.model.CopyPartResult;
import com.obs.services.model.DeleteObjectsRequest;
import com.obs.services.model.GetObjectMetadataRequest;
import com.obs.services.model.InitiateMultipartUploadRequest;
import com.obs.services.model.InitiateMultipartUploadResult;
import com.obs.services.model.KeyAndVersion;
import com.obs.services.model.ListObjectsRequest;
import com.obs.services.model.ObjectListing;
import com.obs.services.model.ObjectMetadata;
import com.obs.services.model.ObsObject;
import com.obs.services.model.PartEtag;
import com.obs.services.model.PutObjectRequest;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* Object bucket specific utils for {@link OBSFileSystem}.
*/
final class OBSObjectBucketUtils {
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
OBSObjectBucketUtils.class);
private OBSObjectBucketUtils() {
}
/**
* The inner rename operation.
*
* @param owner OBS File System instance
* @param src path to be renamed
* @param dst new path after rename
* @return boolean
* @throws RenameFailedException if some criteria for a state changing rename
* was not met. This means work didn't happen;
* it's not something which is reported upstream
* to the FileSystem APIs, for which the
* semantics of "false" are pretty vague.
* @throws FileNotFoundException there's no source file.
* @throws IOException on IO failure.
* @throws ObsException on failures inside the OBS SDK
*/
static boolean renameBasedOnObject(final OBSFileSystem owner,
final Path src, final Path dst) throws RenameFailedException,
FileNotFoundException, IOException,
ObsException {
String srcKey = OBSCommonUtils.pathToKey(owner, src);
String dstKey = OBSCommonUtils.pathToKey(owner, dst);
if (srcKey.isEmpty()) {
LOG.error("rename: src [{}] is root directory", src);
throw new IOException(src + " is root directory");
}
// get the source file status; this raises a FNFE if there is no source
// file.
FileStatus srcStatus = owner.getFileStatus(src);
FileStatus dstStatus;
try {
dstStatus = owner.getFileStatus(dst);
// if there is no destination entry, an exception is raised.
// hence this code sequence can assume that there is something
// at the end of the path; the only detail being what it is and
// whether or not it can be the destination of the rename.
if (dstStatus.isDirectory()) {
String newDstKey = OBSCommonUtils.maybeAddTrailingSlash(dstKey);
String filename = srcKey.substring(
OBSCommonUtils.pathToKey(owner, src.getParent()).length()
+ 1);
newDstKey = newDstKey + filename;
dstKey = newDstKey;
dstStatus = owner.getFileStatus(
OBSCommonUtils.keyToPath(dstKey));
if (dstStatus.isDirectory()) {
throw new RenameFailedException(src, dst,
"new destination is an existed directory")
.withExitCode(false);
} else {
throw new RenameFailedException(src, dst,
"new destination is an existed file")
.withExitCode(false);
}
} else {
if (srcKey.equals(dstKey)) {
LOG.warn(
"rename: src and dest refer to the same file or"
+ " directory: {}",
dst);
return true;
} else {
throw new RenameFailedException(src, dst,
"destination is an existed file")
.withExitCode(false);
}
}
} catch (FileNotFoundException e) {
LOG.debug("rename: destination path {} not found", dst);
// Parent must exist
checkDestinationParent(owner, src, dst);
}
if (dstKey.startsWith(srcKey)
&& dstKey.charAt(srcKey.length()) == Path.SEPARATOR_CHAR) {
LOG.error("rename: dest [{}] cannot be a descendant of src [{}]",
dst, src);
return false;
}
// Ok! Time to start
if (srcStatus.isFile()) {
LOG.debug("rename: renaming file {} to {}", src, dst);
renameFile(owner, srcKey, dstKey, srcStatus);
} else {
LOG.debug("rename: renaming directory {} to {}", src, dst);
// This is a directory to directory copy
dstKey = OBSCommonUtils.maybeAddTrailingSlash(dstKey);
srcKey = OBSCommonUtils.maybeAddTrailingSlash(srcKey);
renameFolder(owner, srcKey, dstKey);
}
if (src.getParent() != dst.getParent()) {
// deleteUnnecessaryFakeDirectories(dst.getParent());
createFakeDirectoryIfNecessary(owner, src.getParent());
}
return true;
}
private static void checkDestinationParent(final OBSFileSystem owner,
final Path src,
final Path dst) throws IOException {
Path parent = dst.getParent();
if (!OBSCommonUtils.pathToKey(owner, parent).isEmpty()) {
try {
FileStatus dstParentStatus = owner.getFileStatus(
dst.getParent());
if (!dstParentStatus.isDirectory()) {
throw new ParentNotDirectoryException(
"destination parent [" + dst.getParent()
+ "] is not a directory");
}
} catch (FileNotFoundException e2) {
throw new RenameFailedException(src, dst,
"destination has no parent ");
}
}
}
/**
* Implement rename file.
*
* @param owner OBS File System instance
* @param srcKey source object key
* @param dstKey destination object key
* @param srcStatus source object status
* @throws IOException any problem with rename operation
*/
private static void renameFile(final OBSFileSystem owner,
final String srcKey,
final String dstKey,
final FileStatus srcStatus)
throws IOException {
long startTime = System.nanoTime();
copyFile(owner, srcKey, dstKey, srcStatus.getLen());
objectDelete(owner, srcStatus, false);
if (LOG.isDebugEnabled()) {
long delay = System.nanoTime() - startTime;
LOG.debug("OBSFileSystem rename: "
+ ", {src="
+ srcKey
+ ", dst="
+ dstKey
+ ", delay="
+ delay
+ "}");
}
}
static boolean objectDelete(final OBSFileSystem owner,
final FileStatus status,
final boolean recursive) throws IOException {
Path f = status.getPath();
String key = OBSCommonUtils.pathToKey(owner, f);
if (status.isDirectory()) {
LOG.debug("delete: Path is a directory: {} - recursive {}", f,
recursive);
key = OBSCommonUtils.maybeAddTrailingSlash(key);
if (!key.endsWith("/")) {
key = key + "/";
}
boolean isEmptyDir = OBSCommonUtils.isFolderEmpty(owner, key);
if (key.equals("/")) {
return OBSCommonUtils.rejectRootDirectoryDelete(
owner.getBucket(), isEmptyDir, recursive);
}
if (!recursive && !isEmptyDir) {
throw new PathIsNotEmptyDirectoryException(f.toString());
}
if (isEmptyDir) {
LOG.debug(
"delete: Deleting fake empty directory {} - recursive {}",
f, recursive);
OBSCommonUtils.deleteObject(owner, key);
} else {
LOG.debug(
"delete: Deleting objects for directory prefix {} "
+ "- recursive {}",
f, recursive);
deleteNonEmptyDir(owner, recursive, key);
}
} else {
LOG.debug("delete: Path is a file");
OBSCommonUtils.deleteObject(owner, key);
}
Path parent = f.getParent();
if (parent != null) {
createFakeDirectoryIfNecessary(owner, parent);
}
return true;
}
/**
* Implement rename folder.
*
* @param owner OBS File System instance
* @param srcKey source folder key
* @param dstKey destination folder key
* @throws IOException any problem with rename folder
*/
static void renameFolder(final OBSFileSystem owner, final String srcKey,
final String dstKey)
throws IOException {
long startTime = System.nanoTime();
List<KeyAndVersion> keysToDelete = new ArrayList<>();
createFakeDirectory(owner, dstKey);
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(owner.getBucket());
request.setPrefix(srcKey);
request.setMaxKeys(owner.getMaxKeys());
ObjectListing objects = OBSCommonUtils.listObjects(owner, request);
List<Future<CopyObjectResult>> copyfutures = new LinkedList<>();
while (true) {
for (ObsObject summary : objects.getObjects()) {
if (summary.getObjectKey().equals(srcKey)) {
// skip prefix itself
continue;
}
keysToDelete.add(new KeyAndVersion(summary.getObjectKey()));
String newDstKey = dstKey + summary.getObjectKey()
.substring(srcKey.length());
// copyFile(summary.getObjectKey(), newDstKey,
// summary.getMetadata().getContentLength());
copyfutures.add(
copyFileAsync(owner, summary.getObjectKey(), newDstKey,
summary.getMetadata().getContentLength()));
if (keysToDelete.size() == owner.getMaxEntriesToDelete()) {
waitAllCopyFinished(copyfutures);
copyfutures.clear();
}
}
if (!objects.isTruncated()) {
if (!keysToDelete.isEmpty()) {
waitAllCopyFinished(copyfutures);
copyfutures.clear();
}
break;
}
objects = OBSCommonUtils.continueListObjects(owner, objects);
}
keysToDelete.add(new KeyAndVersion(srcKey));
DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(
owner.getBucket());
deleteObjectsRequest.setKeyAndVersions(
keysToDelete.toArray(new KeyAndVersion[0]));
OBSCommonUtils.deleteObjects(owner, deleteObjectsRequest);
if (LOG.isDebugEnabled()) {
long delay = System.nanoTime() - startTime;
LOG.debug(
"OBSFileSystem rename: "
+ ", {src="
+ srcKey
+ ", dst="
+ dstKey
+ ", delay="
+ delay
+ "}");
}
}
private static void waitAllCopyFinished(
final List<Future<CopyObjectResult>> copyFutures)
throws IOException {
try {
for (Future<CopyObjectResult> copyFuture : copyFutures) {
copyFuture.get();
}
} catch (InterruptedException e) {
LOG.warn("Interrupted while copying objects (copy)");
throw new InterruptedIOException(
"Interrupted while copying objects (copy)");
} catch (ExecutionException e) {
for (Future<CopyObjectResult> future : copyFutures) {
future.cancel(true);
}
throw OBSCommonUtils.extractException(
"waitAllCopyFinished", copyFutures.toString(), e);
}
}
/**
* Request object metadata; increments counters in the process.
*
* @param owner OBS File System instance
* @param key key
* @return the metadata
*/
protected static ObjectMetadata getObjectMetadata(final OBSFileSystem owner,
final String key) {
GetObjectMetadataRequest request = new GetObjectMetadataRequest();
request.setBucketName(owner.getBucket());
request.setObjectKey(key);
if (owner.getSse().isSseCEnable()) {
request.setSseCHeader(owner.getSse().getSseCHeader());
}
ObjectMetadata meta = owner.getObsClient().getObjectMetadata(request);
owner.getSchemeStatistics().incrementReadOps(1);
return meta;
}
/**
* Create a new object metadata instance. Any standard metadata headers are
* added here, for example: encryption.
*
* @param length length of data to set in header.
* @return a new metadata instance
*/
static ObjectMetadata newObjectMetadata(final long length) {
final ObjectMetadata om = new ObjectMetadata();
if (length >= 0) {
om.setContentLength(length);
}
return om;
}
private static void deleteNonEmptyDir(final OBSFileSystem owner,
final boolean recursive, final String key) throws IOException {
String delimiter = recursive ? null : "/";
ListObjectsRequest request = OBSCommonUtils.createListObjectsRequest(
owner, key, delimiter);
ObjectListing objects = OBSCommonUtils.listObjects(owner, request);
List<KeyAndVersion> keys = new ArrayList<>(objects.getObjects().size());
while (true) {
for (ObsObject summary : objects.getObjects()) {
if (summary.getObjectKey().equals(key)) {
// skip prefix itself
continue;
}
keys.add(new KeyAndVersion(summary.getObjectKey()));
LOG.debug("Got object to delete {}", summary.getObjectKey());
if (keys.size() == owner.getMaxEntriesToDelete()) {
OBSCommonUtils.removeKeys(owner, keys, true, true);
}
}
if (!objects.isTruncated()) {
keys.add(new KeyAndVersion(key));
OBSCommonUtils.removeKeys(owner, keys, false, true);
break;
}
objects = OBSCommonUtils.continueListObjects(owner, objects);
}
}
static void createFakeDirectoryIfNecessary(final OBSFileSystem owner,
final Path f)
throws IOException, ObsException {
String key = OBSCommonUtils.pathToKey(owner, f);
if (!key.isEmpty() && !owner.exists(f)) {
LOG.debug("Creating new fake directory at {}", f);
createFakeDirectory(owner, key);
}
}
static void createFakeDirectory(final OBSFileSystem owner,
final String objectName)
throws ObsException, IOException {
String newObjectName = objectName;
newObjectName = OBSCommonUtils.maybeAddTrailingSlash(newObjectName);
createEmptyObject(owner, newObjectName);
}
// Used to create an empty file that represents an empty directory
private static void createEmptyObject(final OBSFileSystem owner,
final String objectName)
throws ObsException, IOException {
for (int retryTime = 1;
retryTime < OBSCommonUtils.MAX_RETRY_TIME; retryTime++) {
try {
innerCreateEmptyObject(owner, objectName);
return;
} catch (ObsException e) {
LOG.warn("Failed to create empty object [{}], retry time [{}], "
+ "exception [{}]", objectName, retryTime, e);
try {
Thread.sleep(OBSCommonUtils.DELAY_TIME);
} catch (InterruptedException ie) {
throw e;
}
}
}
innerCreateEmptyObject(owner, objectName);
}
// Used to create an empty file that represents an empty directory
private static void innerCreateEmptyObject(final OBSFileSystem owner,
final String objectName)
throws ObsException, IOException {
final InputStream im =
new InputStream() {
@Override
public int read() {
return -1;
}
};
PutObjectRequest putObjectRequest = OBSCommonUtils
.newPutObjectRequest(owner, objectName, newObjectMetadata(0L), im);
long len;
if (putObjectRequest.getFile() != null) {
len = putObjectRequest.getFile().length();
} else {
len = putObjectRequest.getMetadata().getContentLength();
}
try {
owner.getObsClient().putObject(putObjectRequest);
owner.getSchemeStatistics().incrementWriteOps(1);
owner.getSchemeStatistics().incrementBytesWritten(len);
} finally {
im.close();
}
}
/**
* Copy a single object in the bucket via a COPY operation.
*
* @param owner OBS File System instance
* @param srcKey source object path
* @param dstKey destination object path
* @param size object size
* @throws InterruptedIOException the operation was interrupted
* @throws IOException Other IO problems
*/
private static void copyFile(final OBSFileSystem owner, final String srcKey,
final String dstKey, final long size)
throws IOException, InterruptedIOException {
for (int retryTime = 1;
retryTime < OBSCommonUtils.MAX_RETRY_TIME; retryTime++) {
try {
innerCopyFile(owner, srcKey, dstKey, size);
return;
} catch (InterruptedIOException e) {
throw e;
} catch (IOException e) {
LOG.warn(
"Failed to copy file from [{}] to [{}] with size [{}], "
+ "retry time [{}], exception [{}]", srcKey, dstKey,
size, retryTime, e);
try {
Thread.sleep(OBSCommonUtils.DELAY_TIME);
} catch (InterruptedException ie) {
throw e;
}
}
}
innerCopyFile(owner, srcKey, dstKey, size);
}
private static void innerCopyFile(final OBSFileSystem owner,
final String srcKey,
final String dstKey, final long size)
throws IOException {
LOG.debug("copyFile {} -> {} ", srcKey, dstKey);
try {
// 100MB per part
if (size > owner.getCopyPartSize()) {
// initial copy part task
InitiateMultipartUploadRequest request
= new InitiateMultipartUploadRequest(owner.getBucket(),
dstKey);
request.setAcl(owner.getCannedACL());
if (owner.getSse().isSseCEnable()) {
request.setSseCHeader(owner.getSse().getSseCHeader());
} else if (owner.getSse().isSseKmsEnable()) {
request.setSseKmsHeader(owner.getSse().getSseKmsHeader());
}
InitiateMultipartUploadResult result = owner.getObsClient()
.initiateMultipartUpload(request);
final String uploadId = result.getUploadId();
LOG.debug("Multipart copy file, uploadId: {}", uploadId);
// count the parts
long partCount = calPartCount(owner.getCopyPartSize(), size);
final List<PartEtag> partEtags =
getCopyFilePartEtags(owner, srcKey, dstKey, size, uploadId,
partCount);
// merge the copy parts
CompleteMultipartUploadRequest completeMultipartUploadRequest =
new CompleteMultipartUploadRequest(owner.getBucket(),
dstKey, uploadId, partEtags);
owner.getObsClient()
.completeMultipartUpload(completeMultipartUploadRequest);
} else {
ObjectMetadata srcom = getObjectMetadata(owner, srcKey);
ObjectMetadata dstom = cloneObjectMetadata(srcom);
final CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(owner.getBucket(), srcKey,
owner.getBucket(), dstKey);
copyObjectRequest.setAcl(owner.getCannedACL());
copyObjectRequest.setNewObjectMetadata(dstom);
if (owner.getSse().isSseCEnable()) {
copyObjectRequest.setSseCHeader(
owner.getSse().getSseCHeader());
copyObjectRequest.setSseCHeaderSource(
owner.getSse().getSseCHeader());
} else if (owner.getSse().isSseKmsEnable()) {
copyObjectRequest.setSseKmsHeader(
owner.getSse().getSseKmsHeader());
}
owner.getObsClient().copyObject(copyObjectRequest);
}
owner.getSchemeStatistics().incrementWriteOps(1);
} catch (ObsException e) {
throw OBSCommonUtils.translateException(
"copyFile(" + srcKey + ", " + dstKey + ")", srcKey, e);
}
}
static int calPartCount(final long partSize, final long cloudSize) {
// get user setting of per copy part size ,default is 100MB
// calculate the part count
long partCount = cloudSize % partSize == 0
? cloudSize / partSize
: cloudSize / partSize + 1;
return (int) partCount;
}
static List<PartEtag> getCopyFilePartEtags(final OBSFileSystem owner,
final String srcKey,
final String dstKey,
final long objectSize,
final String uploadId,
final long partCount)
throws IOException {
final List<PartEtag> partEtags = Collections.synchronizedList(
new ArrayList<>());
final List<Future<?>> partCopyFutures = new ArrayList<>();
submitCopyPartTasks(owner, srcKey, dstKey, objectSize, uploadId,
partCount, partEtags, partCopyFutures);
// wait the tasks for completing
try {
for (Future<?> partCopyFuture : partCopyFutures) {
partCopyFuture.get();
}
} catch (InterruptedException e) {
LOG.warn("Interrupted while copying objects (copy)");
throw new InterruptedIOException(
"Interrupted while copying objects (copy)");
} catch (ExecutionException e) {
LOG.error("Multipart copy file exception.", e);
for (Future<?> future : partCopyFutures) {
future.cancel(true);
}
owner.getObsClient()
.abortMultipartUpload(
new AbortMultipartUploadRequest(owner.getBucket(), dstKey,
uploadId));
throw OBSCommonUtils.extractException(
"Multi-part copy with id '" + uploadId + "' from " + srcKey
+ "to " + dstKey, dstKey, e);
}
// Make part numbers in ascending order
partEtags.sort(Comparator.comparingInt(PartEtag::getPartNumber));
return partEtags;
}
@SuppressWarnings("checkstyle:parameternumber")
private static void submitCopyPartTasks(final OBSFileSystem owner,
final String srcKey,
final String dstKey,
final long objectSize,
final String uploadId,
final long partCount,
final List<PartEtag> partEtags,
final List<Future<?>> partCopyFutures) {
for (int i = 0; i < partCount; i++) {
final long rangeStart = i * owner.getCopyPartSize();
final long rangeEnd = (i + 1 == partCount)
? objectSize - 1
: rangeStart + owner.getCopyPartSize() - 1;
final int partNumber = i + 1;
partCopyFutures.add(
owner.getBoundedCopyPartThreadPool().submit(() -> {
CopyPartRequest request = new CopyPartRequest();
request.setUploadId(uploadId);
request.setSourceBucketName(owner.getBucket());
request.setSourceObjectKey(srcKey);
request.setDestinationBucketName(owner.getBucket());
request.setDestinationObjectKey(dstKey);
request.setByteRangeStart(rangeStart);
request.setByteRangeEnd(rangeEnd);
request.setPartNumber(partNumber);
if (owner.getSse().isSseCEnable()) {
request.setSseCHeaderSource(
owner.getSse().getSseCHeader());
request.setSseCHeaderDestination(
owner.getSse().getSseCHeader());
}
CopyPartResult result = owner.getObsClient()
.copyPart(request);
partEtags.add(
new PartEtag(result.getEtag(), result.getPartNumber()));
LOG.debug(
"Multipart copy file, uploadId: {}, Part#{} done.",
uploadId, partNumber);
}));
}
}
/**
* Creates a copy of the passed {@link ObjectMetadata}. Does so without using
* the {@link ObjectMetadata#clone()} method, to avoid copying unnecessary
* headers.
*
* @param source the {@link ObjectMetadata} to copy
* @return a copy of {@link ObjectMetadata} with only relevant attributes
*/
private static ObjectMetadata cloneObjectMetadata(
final ObjectMetadata source) {
// This approach may be too brittle, especially if
// in future there are new attributes added to ObjectMetadata
// that we do not explicitly call to set here
ObjectMetadata ret = newObjectMetadata(source.getContentLength());
if (source.getContentEncoding() != null) {
ret.setContentEncoding(source.getContentEncoding());
}
return ret;
}
static OBSFileStatus innerGetObjectStatus(final OBSFileSystem owner,
final Path f)
throws IOException {
final Path path = OBSCommonUtils.qualify(owner, f);
String key = OBSCommonUtils.pathToKey(owner, path);
LOG.debug("Getting path status for {} ({})", path, key);
if (!StringUtils.isEmpty(key)) {
try {
ObjectMetadata meta = getObjectMetadata(owner, key);
if (OBSCommonUtils.objectRepresentsDirectory(key,
meta.getContentLength())) {
LOG.debug("Found exact file: fake directory");
return new OBSFileStatus(path, owner.getUsername());
} else {
LOG.debug("Found exact file: normal file");
return new OBSFileStatus(meta.getContentLength(),
OBSCommonUtils.dateToLong(meta.getLastModified()),
path, owner.getDefaultBlockSize(path),
owner.getUsername());
}
} catch (ObsException e) {
if (e.getResponseCode() != OBSCommonUtils.NOT_FOUND_CODE) {
throw OBSCommonUtils.translateException("getFileStatus",
path, e);
}
}
if (!key.endsWith("/")) {
String newKey = key + "/";
try {
ObjectMetadata meta = getObjectMetadata(owner, newKey);
if (OBSCommonUtils.objectRepresentsDirectory(newKey,
meta.getContentLength())) {
LOG.debug("Found file (with /): fake directory");
return new OBSFileStatus(path, owner.getUsername());
} else {
LOG.debug(
"Found file (with /): real file? should not "
+ "happen: {}",
key);
return new OBSFileStatus(meta.getContentLength(),
OBSCommonUtils.dateToLong(meta.getLastModified()),
path,
owner.getDefaultBlockSize(path),
owner.getUsername());
}
} catch (ObsException e) {
if (e.getResponseCode() != OBSCommonUtils.NOT_FOUND_CODE) {
throw OBSCommonUtils.translateException("getFileStatus",
newKey, e);
}
}
}
}
try {
boolean isEmpty = OBSCommonUtils.innerIsFolderEmpty(owner, key);
LOG.debug("Is dir ({}) empty? {}", path, isEmpty);
return new OBSFileStatus(path, owner.getUsername());
} catch (ObsException e) {
if (e.getResponseCode() != OBSCommonUtils.NOT_FOUND_CODE) {
throw OBSCommonUtils.translateException("getFileStatus", key,
e);
}
}
LOG.debug("Not Found: {}", path);
throw new FileNotFoundException("No such file or directory: " + path);
}
static ContentSummary getDirectoryContentSummary(final OBSFileSystem owner,
final String key) throws IOException {
String newKey = key;
newKey = OBSCommonUtils.maybeAddTrailingSlash(newKey);
long[] summary = {0, 0, 1};
LOG.debug("Summary key {}", newKey);
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(owner.getBucket());
request.setPrefix(newKey);
Set<String> directories = new TreeSet<>();
request.setMaxKeys(owner.getMaxKeys());
ObjectListing objects = OBSCommonUtils.listObjects(owner, request);
while (true) {
if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjects()
.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found path as directory (with /): {}/{}",
objects.getCommonPrefixes().size(),
objects.getObjects().size());
}
for (String prefix : objects.getCommonPrefixes()) {
LOG.debug("Objects in folder [" + prefix + "]:");
getDirectories(prefix, newKey, directories);
}
for (ObsObject obj : objects.getObjects()) {
LOG.debug("Summary: {} {}", obj.getObjectKey(),
obj.getMetadata().getContentLength());
if (!obj.getObjectKey().endsWith("/")) {
summary[0] += obj.getMetadata().getContentLength();
summary[1] += 1;
}
getDirectories(obj.getObjectKey(), newKey, directories);
}
}
if (!objects.isTruncated()) {
break;
}
objects = OBSCommonUtils.continueListObjects(owner, objects);
}
summary[2] += directories.size();
LOG.debug(String.format(
"file size [%d] - file count [%d] - directory count [%d] - "
+ "file path [%s]",
summary[0],
summary[1], summary[2], newKey));
return new ContentSummary.Builder().length(summary[0])
.fileCount(summary[1]).directoryCount(summary[2])
.spaceConsumed(summary[0]).build();
}
private static void getDirectories(final String key, final String sourceKey,
final Set<String> directories) {
Path p = new Path(key);
Path sourcePath = new Path(sourceKey);
// directory must add first
if (key.endsWith("/") && p.compareTo(sourcePath) > 0) {
directories.add(p.toString());
}
while (p.compareTo(sourcePath) > 0) {
Optional<Path> parent = p.getOptionalParentPath();
if (!parent.isPresent()) {
break;
}
p = parent.get();
if (p.compareTo(sourcePath) == 0) {
break;
}
directories.add(p.toString());
}
}
private static Future<CopyObjectResult> copyFileAsync(
final OBSFileSystem owner,
final String srcKey,
final String dstKey, final long size) {
return owner.getBoundedCopyThreadPool().submit(() -> {
copyFile(owner, srcKey, dstKey, size);
return null;
});
}
}

View File

@ -0,0 +1,745 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import com.obs.services.exception.ObsException;
import com.obs.services.model.KeyAndVersion;
import com.obs.services.model.ListObjectsRequest;
import com.obs.services.model.ObjectListing;
import com.obs.services.model.ObsObject;
import com.obs.services.model.fs.GetAttributeRequest;
import com.obs.services.model.fs.NewFolderRequest;
import com.obs.services.model.fs.ObsFSAttribute;
import com.obs.services.model.fs.RenameRequest;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.ParentNotDirectoryException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* Posix bucket specific utils for {@link OBSFileSystem}.
*/
final class OBSPosixBucketUtils {
/**
* Class logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(
OBSPosixBucketUtils.class);
private OBSPosixBucketUtils() {
}
/**
* Get the depth of an absolute path, that is the number of '/' in the path.
*
* @param key object key
* @return depth
*/
static int fsGetObjectKeyDepth(final String key) {
int depth = 0;
for (int idx = key.indexOf('/');
idx >= 0; idx = key.indexOf('/', idx + 1)) {
depth++;
}
return key.endsWith("/") ? depth - 1 : depth;
}
/**
* Used to judge that an object is a file or folder.
*
* @param attr posix object attribute
* @return is posix folder
*/
static boolean fsIsFolder(final ObsFSAttribute attr) {
final int ifDir = 0x004000;
int mode = attr.getMode();
// object mode is -1 when the object is migrated from
// object bucket to posix bucket.
// -1 is a file, not folder.
if (mode < 0) {
return false;
}
return (mode & ifDir) != 0;
}
/**
* The inner rename operation based on Posix bucket.
*
* @param owner OBS File System instance
* @param src source path to be renamed from
* @param dst destination path to be renamed to
* @return boolean
* @throws RenameFailedException if some criteria for a state changing rename
* was not met. This means work didn't happen;
* it's not something which is reported upstream
* to the FileSystem APIs, for which the
* semantics of "false" are pretty vague.
* @throws IOException on IO failure.
*/
static boolean renameBasedOnPosix(final OBSFileSystem owner, final Path src,
final Path dst) throws IOException {
Path dstPath = dst;
String srcKey = OBSCommonUtils.pathToKey(owner, src);
String dstKey = OBSCommonUtils.pathToKey(owner, dstPath);
if (srcKey.isEmpty()) {
LOG.error("rename: src [{}] is root directory", src);
return false;
}
try {
FileStatus dstStatus = owner.getFileStatus(dstPath);
if (dstStatus.isDirectory()) {
String newDstString = OBSCommonUtils.maybeAddTrailingSlash(
dstPath.toString());
String filename = srcKey.substring(
OBSCommonUtils.pathToKey(owner, src.getParent())
.length() + 1);
dstPath = new Path(newDstString + filename);
dstKey = OBSCommonUtils.pathToKey(owner, dstPath);
LOG.debug(
"rename: dest is an existing directory and will be "
+ "changed to [{}]", dstPath);
if (owner.exists(dstPath)) {
LOG.error("rename: failed to rename " + src + " to "
+ dstPath
+ " because destination exists");
return false;
}
} else {
if (srcKey.equals(dstKey)) {
LOG.warn(
"rename: src and dest refer to the same "
+ "file or directory: {}", dstPath);
return true;
} else {
LOG.error("rename: failed to rename " + src + " to "
+ dstPath
+ " because destination exists");
return false;
}
}
} catch (FileNotFoundException e) {
// if destination does not exist, do not change the
// destination key, and just do rename.
LOG.debug("rename: dest [{}] does not exist", dstPath);
} catch (FileConflictException e) {
Path parent = dstPath.getParent();
if (!OBSCommonUtils.pathToKey(owner, parent).isEmpty()) {
FileStatus dstParentStatus = owner.getFileStatus(parent);
if (!dstParentStatus.isDirectory()) {
throw new ParentNotDirectoryException(
parent + " is not a directory");
}
}
}
if (dstKey.startsWith(srcKey) && (dstKey.equals(srcKey)
|| dstKey.charAt(srcKey.length()) == Path.SEPARATOR_CHAR)) {
LOG.error("rename: dest [{}] cannot be a descendant of src [{}]",
dstPath, src);
return false;
}
return innerFsRenameWithRetry(owner, src, dstPath, srcKey, dstKey);
}
private static boolean innerFsRenameWithRetry(final OBSFileSystem owner,
final Path src,
final Path dst, final String srcKey, final String dstKey)
throws IOException {
boolean renameResult = true;
int retryTime = 1;
while (retryTime <= OBSCommonUtils.MAX_RETRY_TIME) {
try {
LOG.debug("rename: {}-st rename from [{}] to [{}] ...",
retryTime, srcKey, dstKey);
innerFsRenameFile(owner, srcKey, dstKey);
renameResult = true;
break;
} catch (FileNotFoundException e) {
if (owner.exists(dst)) {
LOG.warn(
"rename: successfully {}-st rename src [{}] "
+ "to dest [{}] with SDK retry",
retryTime, src, dst, e);
renameResult = true;
} else {
LOG.error(
"rename: failed {}-st rename src [{}] to dest [{}]",
retryTime, src, dst, e);
renameResult = false;
}
break;
} catch (IOException e) {
if (retryTime == OBSCommonUtils.MAX_RETRY_TIME) {
LOG.error(
"rename: failed {}-st rename src [{}] to dest [{}]",
retryTime, src, dst, e);
throw e;
} else {
LOG.warn(
"rename: failed {}-st rename src [{}] to dest [{}]",
retryTime, src, dst, e);
if (owner.exists(dst) && owner.exists(src)) {
LOG.warn(
"rename: failed {}-st rename src [{}] to "
+ "dest [{}] with SDK retry", retryTime, src,
dst, e);
renameResult = false;
break;
}
try {
Thread.sleep(OBSCommonUtils.DELAY_TIME);
} catch (InterruptedException ie) {
throw e;
}
}
}
retryTime++;
}
return renameResult;
}
/**
* Used to rename a source folder to a destination folder that is not existed
* before rename.
*
* @param owner OBS File System instance
* @param src source folder key
* @param dst destination folder key that not existed before rename
* @throws IOException any io exception
* @throws ObsException any obs operation exception
*/
static void fsRenameToNewFolder(final OBSFileSystem owner, final String src,
final String dst)
throws IOException, ObsException {
LOG.debug("RenameFolder path {} to {}", src, dst);
try {
RenameRequest renameObjectRequest = new RenameRequest();
renameObjectRequest.setBucketName(owner.getBucket());
renameObjectRequest.setObjectKey(src);
renameObjectRequest.setNewObjectKey(dst);
owner.getObsClient().renameFolder(renameObjectRequest);
owner.getSchemeStatistics().incrementWriteOps(1);
} catch (ObsException e) {
throw OBSCommonUtils.translateException(
"renameFile(" + src + ", " + dst + ")", src, e);
}
}
static void innerFsRenameFile(final OBSFileSystem owner,
final String srcKey,
final String dstKey) throws IOException {
LOG.debug("RenameFile path {} to {}", srcKey, dstKey);
try {
final RenameRequest renameObjectRequest = new RenameRequest();
renameObjectRequest.setBucketName(owner.getBucket());
renameObjectRequest.setObjectKey(srcKey);
renameObjectRequest.setNewObjectKey(dstKey);
owner.getObsClient().renameFile(renameObjectRequest);
owner.getSchemeStatistics().incrementWriteOps(1);
} catch (ObsException e) {
if (e.getResponseCode() == OBSCommonUtils.NOT_FOUND_CODE) {
throw new FileNotFoundException(
"No such file or directory: " + srcKey);
}
if (e.getResponseCode() == OBSCommonUtils.CONFLICT_CODE) {
throw new FileConflictException(
"File conflicts during rename, " + e.getResponseStatus());
}
throw OBSCommonUtils.translateException(
"renameFile(" + srcKey + ", " + dstKey + ")", srcKey, e);
}
}
/**
* Used to rename a source object to a destination object which is not existed
* before rename.
*
* @param owner OBS File System instance
* @param srcKey source object key
* @param dstKey destination object key
* @throws IOException io exception
*/
static void fsRenameToNewObject(final OBSFileSystem owner,
final String srcKey,
final String dstKey) throws IOException {
String newSrcKey = srcKey;
String newdstKey = dstKey;
newSrcKey = OBSCommonUtils.maybeDeleteBeginningSlash(newSrcKey);
newdstKey = OBSCommonUtils.maybeDeleteBeginningSlash(newdstKey);
if (newSrcKey.endsWith("/")) {
// Rename folder.
fsRenameToNewFolder(owner, newSrcKey, newdstKey);
} else {
// Rename file.
innerFsRenameFile(owner, newSrcKey, newdstKey);
}
}
// Delete a file.
private static int fsRemoveFile(final OBSFileSystem owner,
final String sonObjectKey,
final List<KeyAndVersion> files)
throws IOException {
files.add(new KeyAndVersion(sonObjectKey));
if (files.size() == owner.getMaxEntriesToDelete()) {
// batch delete files.
OBSCommonUtils.removeKeys(owner, files, true, false);
return owner.getMaxEntriesToDelete();
}
return 0;
}
// Recursively delete a folder that might be not empty.
static boolean fsDelete(final OBSFileSystem owner, final FileStatus status,
final boolean recursive)
throws IOException, ObsException {
long startTime = System.currentTimeMillis();
long threadId = Thread.currentThread().getId();
Path f = status.getPath();
String key = OBSCommonUtils.pathToKey(owner, f);
if (!status.isDirectory()) {
LOG.debug("delete: Path is a file");
trashObjectIfNeed(owner, key);
} else {
LOG.debug("delete: Path is a directory: {} - recursive {}", f,
recursive);
key = OBSCommonUtils.maybeAddTrailingSlash(key);
boolean isEmptyDir = OBSCommonUtils.isFolderEmpty(owner, key);
if (key.equals("")) {
return OBSCommonUtils.rejectRootDirectoryDelete(
owner.getBucket(), isEmptyDir, recursive);
}
if (!recursive && !isEmptyDir) {
LOG.warn("delete: Path is not empty: {} - recursive {}", f,
recursive);
throw new PathIsNotEmptyDirectoryException(f.toString());
}
if (isEmptyDir) {
LOG.debug(
"delete: Deleting fake empty directory {} - recursive {}",
f, recursive);
OBSCommonUtils.deleteObject(owner, key);
} else {
LOG.debug(
"delete: Deleting objects for directory prefix {} to "
+ "delete - recursive {}", f, recursive);
trashFolderIfNeed(owner, key, f);
}
}
long endTime = System.currentTimeMillis();
LOG.debug("delete Path:{} thread:{}, timeUsedInMilliSec:{}", f,
threadId, endTime - startTime);
return true;
}
private static void trashObjectIfNeed(final OBSFileSystem owner,
final String key)
throws ObsException, IOException {
if (needToTrash(owner, key)) {
mkTrash(owner, key);
StringBuilder sb = new StringBuilder(owner.getTrashDir());
sb.append(key);
if (owner.exists(new Path(sb.toString()))) {
SimpleDateFormat df = new SimpleDateFormat("-yyyyMMddHHmmss");
sb.append(df.format(new Date()));
}
fsRenameToNewObject(owner, key, sb.toString());
LOG.debug("Moved: '" + key + "' to trash at: " + sb.toString());
} else {
OBSCommonUtils.deleteObject(owner, key);
}
}
private static void trashFolderIfNeed(final OBSFileSystem owner,
final String key,
final Path f) throws ObsException, IOException {
if (needToTrash(owner, key)) {
mkTrash(owner, key);
StringBuilder sb = new StringBuilder(owner.getTrashDir());
String subKey = OBSCommonUtils.maybeAddTrailingSlash(key);
sb.append(subKey);
if (owner.exists(new Path(sb.toString()))) {
SimpleDateFormat df = new SimpleDateFormat("-yyyyMMddHHmmss");
sb.insert(sb.length() - 1, df.format(new Date()));
}
String srcKey = OBSCommonUtils.maybeDeleteBeginningSlash(key);
String dstKey = OBSCommonUtils.maybeDeleteBeginningSlash(
sb.toString());
fsRenameToNewFolder(owner, srcKey, dstKey);
LOG.debug("Moved: '" + key + "' to trash at: " + sb.toString());
} else {
if (owner.isEnableMultiObjectDeleteRecursion()) {
long delNum = fsRecursivelyDeleteDir(owner, key, true);
LOG.debug("Recursively delete {} files/dirs when deleting {}",
delNum, key);
} else {
fsNonRecursivelyDelete(owner, f);
}
}
}
static long fsRecursivelyDeleteDir(final OBSFileSystem owner,
final String parentKey,
final boolean deleteParent) throws IOException {
long delNum = 0;
List<KeyAndVersion> subdirList = new ArrayList<>(
owner.getMaxEntriesToDelete());
List<KeyAndVersion> fileList = new ArrayList<>(
owner.getMaxEntriesToDelete());
ListObjectsRequest request = OBSCommonUtils.createListObjectsRequest(
owner, parentKey, "/", owner.getMaxKeys());
ObjectListing objects = OBSCommonUtils.listObjects(owner, request);
while (true) {
for (String commonPrefix : objects.getCommonPrefixes()) {
if (commonPrefix.equals(parentKey)) {
// skip prefix itself
continue;
}
delNum += fsRemoveSubdir(owner, commonPrefix, subdirList);
}
for (ObsObject sonObject : objects.getObjects()) {
String sonObjectKey = sonObject.getObjectKey();
if (sonObjectKey.equals(parentKey)) {
// skip prefix itself
continue;
}
if (!sonObjectKey.endsWith("/")) {
delNum += fsRemoveFile(owner, sonObjectKey, fileList);
} else {
delNum += fsRemoveSubdir(owner, sonObjectKey, subdirList);
}
}
if (!objects.isTruncated()) {
break;
}
objects = OBSCommonUtils.continueListObjects(owner, objects);
}
delNum += fileList.size();
OBSCommonUtils.removeKeys(owner, fileList, true, false);
delNum += subdirList.size();
OBSCommonUtils.removeKeys(owner, subdirList, true, false);
if (deleteParent) {
OBSCommonUtils.deleteObject(owner, parentKey);
delNum++;
}
return delNum;
}
private static boolean needToTrash(final OBSFileSystem owner,
final String key) {
String newKey = key;
newKey = OBSCommonUtils.maybeDeleteBeginningSlash(newKey);
if (owner.isEnableTrash() && newKey.startsWith(owner.getTrashDir())) {
return false;
}
return owner.isEnableTrash();
}
// Delete a sub dir.
private static int fsRemoveSubdir(final OBSFileSystem owner,
final String subdirKey,
final List<KeyAndVersion> subdirList)
throws IOException {
fsRecursivelyDeleteDir(owner, subdirKey, false);
subdirList.add(new KeyAndVersion(subdirKey));
if (subdirList.size() == owner.getMaxEntriesToDelete()) {
// batch delete subdirs.
OBSCommonUtils.removeKeys(owner, subdirList, true, false);
return owner.getMaxEntriesToDelete();
}
return 0;
}
private static void mkTrash(final OBSFileSystem owner, final String key)
throws ObsException, IOException {
String newKey = key;
StringBuilder sb = new StringBuilder(owner.getTrashDir());
newKey = OBSCommonUtils.maybeAddTrailingSlash(newKey);
sb.append(newKey);
sb.deleteCharAt(sb.length() - 1);
sb.delete(sb.lastIndexOf("/"), sb.length());
Path fastDeleteRecycleDirPath = new Path(sb.toString());
// keep the parent directory of the target path exists
if (!owner.exists(fastDeleteRecycleDirPath)) {
owner.mkdirs(fastDeleteRecycleDirPath);
}
}
// List all sub objects at first, delete sub objects in batch secondly.
private static void fsNonRecursivelyDelete(final OBSFileSystem owner,
final Path parent)
throws IOException, ObsException {
// List sub objects sorted by path depth.
FileStatus[] arFileStatus = OBSCommonUtils.innerListStatus(owner,
parent, true);
// Remove sub objects one depth by one depth to avoid that parents and
// children in a same batch.
fsRemoveKeys(owner, arFileStatus);
// Delete parent folder that should has become empty.
OBSCommonUtils.deleteObject(owner,
OBSCommonUtils.pathToKey(owner, parent));
}
// Remove sub objects of each depth one by one to avoid that parents and
// children in a same batch.
private static void fsRemoveKeys(final OBSFileSystem owner,
final FileStatus[] arFileStatus)
throws ObsException, IOException {
if (arFileStatus.length <= 0) {
// exit fast if there are no keys to delete
return;
}
String key;
for (FileStatus fileStatus : arFileStatus) {
key = OBSCommonUtils.pathToKey(owner, fileStatus.getPath());
OBSCommonUtils.blockRootDelete(owner.getBucket(), key);
}
fsRemoveKeysByDepth(owner, arFileStatus);
}
// Batch delete sub objects one depth by one depth to avoid that parents and
// children in a same
// batch.
// A batch deletion might be split into some concurrent deletions to promote
// the performance, but
// it
// can't make sure that an object is deleted before it's children.
private static void fsRemoveKeysByDepth(final OBSFileSystem owner,
final FileStatus[] arFileStatus)
throws ObsException, IOException {
if (arFileStatus.length <= 0) {
// exit fast if there is no keys to delete
return;
}
// Find all leaf keys in the list.
String key;
int depth = Integer.MAX_VALUE;
List<KeyAndVersion> leafKeys = new ArrayList<>(
owner.getMaxEntriesToDelete());
for (int idx = arFileStatus.length - 1; idx >= 0; idx--) {
if (leafKeys.size() >= owner.getMaxEntriesToDelete()) {
OBSCommonUtils.removeKeys(owner, leafKeys, true, false);
}
key = OBSCommonUtils.pathToKey(owner, arFileStatus[idx].getPath());
// Check file.
if (!arFileStatus[idx].isDirectory()) {
// A file must be a leaf.
leafKeys.add(new KeyAndVersion(key, null));
continue;
}
// Check leaf folder at current depth.
int keyDepth = fsGetObjectKeyDepth(key);
if (keyDepth == depth) {
// Any key at current depth must be a leaf.
leafKeys.add(new KeyAndVersion(key, null));
continue;
}
if (keyDepth < depth) {
// The last batch delete at current depth.
OBSCommonUtils.removeKeys(owner, leafKeys, true, false);
// Go on at the upper depth.
depth = keyDepth;
leafKeys.add(new KeyAndVersion(key, null));
continue;
}
LOG.warn(
"The objects list is invalid because it isn't sorted by"
+ " path depth.");
throw new ObsException("System failure");
}
// The last batch delete at the minimum depth of all keys.
OBSCommonUtils.removeKeys(owner, leafKeys, true, false);
}
// Used to create a folder
static void fsCreateFolder(final OBSFileSystem owner,
final String objectName)
throws ObsException {
for (int retryTime = 1;
retryTime < OBSCommonUtils.MAX_RETRY_TIME; retryTime++) {
try {
innerFsCreateFolder(owner, objectName);
return;
} catch (ObsException e) {
LOG.warn("Failed to create folder [{}], retry time [{}], "
+ "exception [{}]", objectName, retryTime, e);
try {
Thread.sleep(OBSCommonUtils.DELAY_TIME);
} catch (InterruptedException ie) {
throw e;
}
}
}
innerFsCreateFolder(owner, objectName);
}
private static void innerFsCreateFolder(final OBSFileSystem owner,
final String objectName)
throws ObsException {
final NewFolderRequest newFolderRequest = new NewFolderRequest(
owner.getBucket(), objectName);
newFolderRequest.setAcl(owner.getCannedACL());
long len = newFolderRequest.getObjectKey().length();
owner.getObsClient().newFolder(newFolderRequest);
owner.getSchemeStatistics().incrementWriteOps(1);
owner.getSchemeStatistics().incrementBytesWritten(len);
}
// Used to get the status of a file or folder in a file-gateway bucket.
static OBSFileStatus innerFsGetObjectStatus(final OBSFileSystem owner,
final Path f) throws IOException {
final Path path = OBSCommonUtils.qualify(owner, f);
String key = OBSCommonUtils.pathToKey(owner, path);
LOG.debug("Getting path status for {} ({})", path, key);
if (key.isEmpty()) {
LOG.debug("Found root directory");
return new OBSFileStatus(path, owner.getUsername());
}
try {
final GetAttributeRequest getAttrRequest = new GetAttributeRequest(
owner.getBucket(), key);
ObsFSAttribute meta = owner.getObsClient()
.getAttribute(getAttrRequest);
owner.getSchemeStatistics().incrementReadOps(1);
if (fsIsFolder(meta)) {
LOG.debug("Found file (with /): fake directory");
return new OBSFileStatus(path,
OBSCommonUtils.dateToLong(meta.getLastModified()),
owner.getUsername());
} else {
LOG.debug(
"Found file (with /): real file? should not happen: {}",
key);
return new OBSFileStatus(
meta.getContentLength(),
OBSCommonUtils.dateToLong(meta.getLastModified()),
path,
owner.getDefaultBlockSize(path),
owner.getUsername());
}
} catch (ObsException e) {
if (e.getResponseCode() == OBSCommonUtils.NOT_FOUND_CODE) {
LOG.debug("Not Found: {}", path);
throw new FileNotFoundException(
"No such file or directory: " + path);
}
if (e.getResponseCode() == OBSCommonUtils.CONFLICT_CODE) {
throw new FileConflictException(
"file conflicts: " + e.getResponseStatus());
}
throw OBSCommonUtils.translateException("getFileStatus", path, e);
}
}
static ContentSummary fsGetDirectoryContentSummary(
final OBSFileSystem owner,
final String key) throws IOException {
String newKey = key;
newKey = OBSCommonUtils.maybeAddTrailingSlash(newKey);
long[] summary = {0, 0, 1};
LOG.debug("Summary key {}", newKey);
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(owner.getBucket());
request.setPrefix(newKey);
request.setMaxKeys(owner.getMaxKeys());
ObjectListing objects = OBSCommonUtils.listObjects(owner, request);
while (true) {
if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjects()
.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Found path as directory (with /): {}/{}",
objects.getCommonPrefixes().size(),
objects.getObjects().size());
}
for (String prefix : objects.getCommonPrefixes()) {
if (!prefix.equals(newKey)) {
summary[2]++;
}
}
for (ObsObject obj : objects.getObjects()) {
if (!obj.getObjectKey().endsWith("/")) {
summary[0] += obj.getMetadata().getContentLength();
summary[1] += 1;
} else if (!obj.getObjectKey().equals(newKey)) {
summary[2]++;
}
}
}
if (!objects.isTruncated()) {
break;
}
objects = OBSCommonUtils.continueListObjects(owner, objects);
}
LOG.debug(String.format(
"file size [%d] - file count [%d] - directory count [%d] - "
+ "file path [%s]",
summary[0], summary[1], summary[2], newKey));
return new ContentSummary.Builder().length(summary[0])
.fileCount(summary[1]).directoryCount(summary[2])
.spaceConsumed(summary[0]).build();
}
}

View File

@ -0,0 +1,310 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.util.Preconditions;
import com.obs.services.ObsClient;
import com.obs.services.exception.ObsException;
import com.obs.services.model.AbortMultipartUploadRequest;
import com.obs.services.model.CompleteMultipartUploadRequest;
import com.obs.services.model.CompleteMultipartUploadResult;
import com.obs.services.model.InitiateMultipartUploadRequest;
import com.obs.services.model.ObjectMetadata;
import com.obs.services.model.PartEtag;
import com.obs.services.model.PutObjectRequest;
import com.obs.services.model.PutObjectResult;
import com.obs.services.model.UploadPartRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
* Helper for an ongoing write operation.
*
* <p>It hides direct access to the OBS API from the output stream, and is a
* location where the object upload process can be evolved/enhanced.
*
* <p>Features
*
* <ul>
* <li>Methods to create and submit requests to OBS, so avoiding all direct
* interaction with the OBS APIs.
* <li>Some extra preflight checks of arguments, so failing fast on errors.
* <li>Callbacks to let the FS know of events in the output stream upload
* process.
* </ul>
* <p>
* Each instance of this state is unique to a single output stream.
*/
class OBSWriteOperationHelper {
/**
* Class logger.
*/
public static final Logger LOG = LoggerFactory.getLogger(
OBSWriteOperationHelper.class);
/**
* Part number of the multipart task.
*/
static final int PART_NUMBER = 10000;
/**
* Owning filesystem.
*/
private final OBSFileSystem owner;
/**
* Bucket of the owner FS.
*/
private final String bucket;
/**
* Define obs client.
*/
private final ObsClient obs;
protected OBSWriteOperationHelper(final OBSFileSystem fs) {
this.owner = fs;
this.bucket = fs.getBucket();
this.obs = fs.getObsClient();
}
/**
* Create a {@link PutObjectRequest} request. If {@code length} is set, the
* metadata is configured with the size of the upload.
*
* @param destKey key of object
* @param inputStream source data
* @param length size, if known. Use -1 for not known
* @return the request
*/
PutObjectRequest newPutRequest(final String destKey,
final InputStream inputStream,
final long length) {
return OBSCommonUtils.newPutObjectRequest(owner, destKey,
newObjectMetadata(length), inputStream);
}
/**
* Create a {@link PutObjectRequest} request to upload a file.
*
* @param destKey object key for request
* @param sourceFile source file
* @return the request
*/
PutObjectRequest newPutRequest(final String destKey,
final File sourceFile) {
int length = (int) sourceFile.length();
return OBSCommonUtils.newPutObjectRequest(owner, destKey,
newObjectMetadata(length), sourceFile);
}
/**
* Callback on a successful write.
*
* @param destKey object key
*/
void writeSuccessful(final String destKey) {
LOG.debug("Finished write to {}", destKey);
}
/**
* Create a new object metadata instance. Any standard metadata headers are
* added here, for example: encryption.
*
* @param length size, if known. Use -1 for not known
* @return a new metadata instance
*/
public ObjectMetadata newObjectMetadata(final long length) {
return OBSObjectBucketUtils.newObjectMetadata(length);
}
/**
* Start the multipart upload process.
*
* @param destKey object key
* @return the upload result containing the ID
* @throws IOException IO problem
*/
String initiateMultiPartUpload(final String destKey) throws IOException {
LOG.debug("Initiating Multipart upload");
final InitiateMultipartUploadRequest initiateMPURequest =
new InitiateMultipartUploadRequest(bucket, destKey);
initiateMPURequest.setAcl(owner.getCannedACL());
initiateMPURequest.setMetadata(newObjectMetadata(-1));
if (owner.getSse().isSseCEnable()) {
initiateMPURequest.setSseCHeader(owner.getSse().getSseCHeader());
} else if (owner.getSse().isSseKmsEnable()) {
initiateMPURequest.setSseKmsHeader(
owner.getSse().getSseKmsHeader());
}
try {
return obs.initiateMultipartUpload(initiateMPURequest)
.getUploadId();
} catch (ObsException ace) {
throw OBSCommonUtils.translateException("Initiate MultiPartUpload",
destKey, ace);
}
}
/**
* Complete a multipart upload operation.
*
* @param destKey Object key
* @param uploadId multipart operation Id
* @param partETags list of partial uploads
* @return the result
* @throws ObsException on problems.
*/
CompleteMultipartUploadResult completeMultipartUpload(
final String destKey, final String uploadId,
final List<PartEtag> partETags)
throws ObsException {
Preconditions.checkNotNull(uploadId);
Preconditions.checkNotNull(partETags);
Preconditions.checkArgument(!partETags.isEmpty(),
"No partitions have been uploaded");
LOG.debug("Completing multipart upload {} with {} parts", uploadId,
partETags.size());
// a copy of the list is required, so that the OBS SDK doesn't
// attempt to sort an unmodifiable list.
return obs.completeMultipartUpload(
new CompleteMultipartUploadRequest(bucket, destKey, uploadId,
new ArrayList<>(partETags)));
}
/**
* Abort a multipart upload operation.
*
* @param destKey object key
* @param uploadId multipart operation Id
* @throws ObsException on problems. Immediately execute
*/
void abortMultipartUpload(final String destKey, final String uploadId)
throws ObsException {
LOG.debug("Aborting multipart upload {}", uploadId);
obs.abortMultipartUpload(
new AbortMultipartUploadRequest(bucket, destKey, uploadId));
}
/**
* Create request for uploading one part of a multipart task.
*
* @param destKey destination object key
* @param uploadId upload id
* @param partNumber part number
* @param size data size
* @param sourceFile source file to be uploaded
* @return part upload request
*/
UploadPartRequest newUploadPartRequest(
final String destKey,
final String uploadId,
final int partNumber,
final int size,
final File sourceFile) {
Preconditions.checkNotNull(uploadId);
Preconditions.checkArgument(sourceFile != null, "Data source");
Preconditions.checkArgument(size > 0, "Invalid partition size %s",
size);
Preconditions.checkArgument(
partNumber > 0 && partNumber <= PART_NUMBER);
LOG.debug("Creating part upload request for {} #{} size {}", uploadId,
partNumber, size);
UploadPartRequest request = new UploadPartRequest();
request.setUploadId(uploadId);
request.setBucketName(bucket);
request.setObjectKey(destKey);
request.setPartSize((long) size);
request.setPartNumber(partNumber);
request.setFile(sourceFile);
if (owner.getSse().isSseCEnable()) {
request.setSseCHeader(owner.getSse().getSseCHeader());
}
return request;
}
/**
* Create request for uploading one part of a multipart task.
*
* @param destKey destination object key
* @param uploadId upload id
* @param partNumber part number
* @param size data size
* @param uploadStream upload stream for the part
* @return part upload request
*/
UploadPartRequest newUploadPartRequest(
final String destKey,
final String uploadId,
final int partNumber,
final int size,
final InputStream uploadStream) {
Preconditions.checkNotNull(uploadId);
Preconditions.checkArgument(uploadStream != null, "Data source");
Preconditions.checkArgument(size > 0, "Invalid partition size %s",
size);
Preconditions.checkArgument(
partNumber > 0 && partNumber <= PART_NUMBER);
LOG.debug("Creating part upload request for {} #{} size {}", uploadId,
partNumber, size);
UploadPartRequest request = new UploadPartRequest();
request.setUploadId(uploadId);
request.setBucketName(bucket);
request.setObjectKey(destKey);
request.setPartSize((long) size);
request.setPartNumber(partNumber);
request.setInput(uploadStream);
if (owner.getSse().isSseCEnable()) {
request.setSseCHeader(owner.getSse().getSseCHeader());
}
return request;
}
public String toString(final String destKey) {
return "{bucket=" + bucket + ", key='" + destKey + '\'' + '}';
}
/**
* PUT an object directly (i.e. not via the transfer manager).
*
* @param putObjectRequest the request
* @return the upload initiated
* @throws IOException on problems
*/
PutObjectResult putObject(final PutObjectRequest putObjectRequest)
throws IOException {
try {
return OBSCommonUtils.putObjectDirect(owner, putObjectRequest);
} catch (ObsException e) {
throw OBSCommonUtils.translateException("put",
putObjectRequest.getObjectKey(), e);
}
}
}

View File

@ -16,25 +16,42 @@
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
/**
* Indicates the metadata associated with the given Path could not be persisted
* to the metadata store (e.g. S3Guard / DynamoDB). When this occurs, the
* file itself has been successfully written to S3, but the metadata may be out
* of sync. The metadata can be corrected with the "s3guard import" command
* provided by {@link org.apache.hadoop.fs.s3a.s3guard.S3GuardTool}.
* Exception to indicate a specific rename failure. The exit code defines the
* value returned by {@link OBSFileSystem#rename(Path, Path)}.
*/
public class MetadataPersistenceException extends PathIOException {
class RenameFailedException extends PathIOException {
/**
* Exit code to be returned.
*/
private boolean exitCode = false;
RenameFailedException(final Path src, final Path optionalDest,
final String error) {
super(src.toString(), error);
setOperation("rename");
if (optionalDest != null) {
setTargetPath(optionalDest.toString());
}
}
public boolean getExitCode() {
return exitCode;
}
/**
* Constructs a MetadataPersistenceException.
* @param path path of the affected file
* @param cause cause of the issue
* Set the exit code.
*
* @param code exit code to raise
* @return the exception
*/
public MetadataPersistenceException(String path, Throwable cause) {
super(path, cause);
public RenameFailedException withExitCode(final boolean code) {
this.exitCode = code;
return this;
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import static org.apache.hadoop.fs.obs.OBSConstants.SSE_KEY;
import static org.apache.hadoop.fs.obs.OBSConstants.SSE_TYPE;
import com.obs.services.model.SseCHeader;
import com.obs.services.model.SseKmsHeader;
import org.apache.hadoop.conf.Configuration;
/**
* Wrapper for Server-Side Encryption (SSE).
*/
class SseWrapper {
/**
* SSE-KMS: Server-Side Encryption with Key Management Service.
*/
private static final String SSE_KMS = "sse-kms";
/**
* SSE-C: Server-Side Encryption with Customer-Provided Encryption Keys.
*/
private static final String SSE_C = "sse-c";
/**
* SSE-C header.
*/
private SseCHeader sseCHeader;
/**
* SSE-KMS header.
*/
private SseKmsHeader sseKmsHeader;
@SuppressWarnings("deprecation")
SseWrapper(final Configuration conf) {
String sseType = conf.getTrimmed(SSE_TYPE);
if (null != sseType) {
String sseKey = conf.getTrimmed(SSE_KEY);
if (sseType.equalsIgnoreCase(SSE_C) && null != sseKey) {
sseCHeader = new SseCHeader();
sseCHeader.setSseCKeyBase64(sseKey);
sseCHeader.setAlgorithm(
com.obs.services.model.ServerAlgorithm.AES256);
} else if (sseType.equalsIgnoreCase(SSE_KMS)) {
sseKmsHeader = new SseKmsHeader();
sseKmsHeader.setEncryption(
com.obs.services.model.ServerEncryption.OBS_KMS);
sseKmsHeader.setKmsKeyId(sseKey);
}
}
}
boolean isSseCEnable() {
return sseCHeader != null;
}
boolean isSseKmsEnable() {
return sseKmsHeader != null;
}
SseCHeader getSseCHeader() {
return sseCHeader;
}
SseKmsHeader getSseKmsHeader() {
return sseKmsHeader;
}
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Package for supporting
* <a href="https://www.huaweicloud.com/en-us/product/obs.html">HuaweiCloud
* Object Storage Service (OBS)</a> as a backend filesystem in Hadoop.
* <p>
* OBS supports two kinds of buckets: object bucket and posix bucket. Posix
* bucket provides more POSIX-like semantics than object bucket, and is
* recommended for Hadoop. Object bucket is deprecated for Hadoop.
*/
package org.apache.hadoop.fs.obs;

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.hadoop.fs.obs.OBSFileSystem

View File

@ -0,0 +1,370 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
# OBSA: HuaweiCloud OBS Adapter for Hadoop Support
<!-- MACRO{toc|fromDepth=1|toDepth=3} -->
## Introduction
The `hadoop-huaweicloud` module provides support for integration with the
[HuaweiCloud Object Storage Service (OBS)](https://www.huaweicloud.com/en-us/product/obs.html).
This support comes via the JAR file `hadoop-huaweicloud.jar`.
## Features
* Read and write data stored in a HuaweiCloud OBS account.
* Reference file system paths using URLs using the `obs` scheme.
* Present a hierarchical file system view by implementing the standard Hadoop `FileSystem` interface.
* Support multipart upload for a large file.
* Can act as a source of data in a MapReduce job, or a sink.
* Uses HuaweiCloud OBSs Java SDK with support for latest OBS features and authentication schemes.
* Tested for scale.
## Limitations
Partial or no support for the following operations :
* Symbolic link operations.
* Proxy users.
* File truncate.
* File concat.
* File checksum.
* File replication factor.
* Extended Attributes(XAttrs) operations.
* Snapshot operations.
* Storage policy.
* Quota.
* POSIX ACL.
* Delegation token operations.
## Getting Started
### Packages
OBSA depends upon two JARs, alongside `hadoop-common` and its dependencies.
* `hadoop-huaweicloud` JAR.
* `esdk-obs-java` JAR.
The versions of `hadoop-common` and `hadoop-huaweicloud` must be identical.
To import the libraries into a Maven build, add `hadoop-huaweicloud` JAR to the
build dependencies; it will pull in a compatible `esdk-obs-java` JAR.
The `hadoop-huaweicloud` JAR *does not* declare any dependencies other than that
dependencies unique to it, the OBS SDK JAR. This is simplify excluding/tuning
Hadoop dependency JARs in downstream applications. The `hadoop-client` or
`hadoop-common` dependency must be declared.
```xml
<properties>
<!-- Your exact Hadoop version here-->
<hadoop.version>3.4.0</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-huaweicloud</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
```
### Accessing OBS URLs
Before access a URL, OBS implementation classes of Filesystem/AbstractFileSystem and
a region endpoint where a bucket is located shoud be configured as follows:
```xml
<property>
<name>fs.obs.impl</name>
<value>org.apache.hadoop.fs.obs.OBSFileSystem</value>
<description>The OBS implementation class of the Filesystem.</description>
</property>
<property>
<name>fs.AbstractFileSystem.obs.impl</name>
<value>org.apache.hadoop.fs.obs.OBS</value>
<description>The OBS implementation class of the AbstractFileSystem.</description>
</property>
<property>
<name>fs.obs.endpoint</name>
<value>obs.region.myhuaweicloud.com</value>
<description>OBS region endpoint where a bucket is located.</description>
</property>
```
OBS URLs can then be accessed as follows:
```
obs://<bucket_name>/path
```
The scheme `obs` identifies a URL on a Hadoop-compatible file system `OBSFileSystem`
backed by HuaweiCloud OBS.
For example, the following
[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html)
commands demonstrate access to a bucket named `mybucket`.
```bash
hadoop fs -mkdir obs://mybucket/testDir
hadoop fs -put testFile obs://mybucket/testDir/testFile
hadoop fs -cat obs://mybucket/testDir/testFile
test file content
```
For details on how to create a bucket, see
[**Help Center > Object Storage Service > Getting Started> Basic Operation Procedure**](https://support.huaweicloud.com/intl/en-us/qs-obs/obs_qs_0003.html)
### Authenticating with OBS
Except when interacting with public OBS buckets, the OBSA client
needs the credentials needed to interact with buckets.
The client supports multiple authentication mechanisms. The simplest authentication mechanisms is
to provide OBS access key and secret key as follows.
```xml
<property>
<name>fs.obs.access.key</name>
<description>OBS access key.
Omit for provider-based authentication.</description>
</property>
<property>
<name>fs.obs.secret.key</name>
<description>OBS secret key.
Omit for provider-based authentication.</description>
</property>
```
**Do not share access key, secret key, and session token. They must be kept secret.**
Custom implementations
of `com.obs.services.IObsCredentialsProvider` (see [**Creating an Instance of ObsClient**](https://support.huaweicloud.com/intl/en-us/sdk-java-devg-obs/en-us_topic_0142815570.html)) or
`org.apache.hadoop.fs.obs.BasicSessionCredential` may also be used for authentication.
```xml
<property>
<name>fs.obs.security.provider</name>
<description>
Class name of security provider class which implements
com.obs.services.IObsCredentialsProvider, which will
be used to construct an OBS client instance as an input parameter.
</description>
</property>
<property>
<name>fs.obs.credentials.provider</name>
<description>
lass nameCof credential provider class which implements
org.apache.hadoop.fs.obs.BasicSessionCredential,
which must override three APIs: getOBSAccessKeyId(),
getOBSSecretKey(), and getSessionToken().
</description>
</property>
```
## General OBSA Client Configuration
All OBSA client options are configured with options with the prefix `fs.obs.`.
```xml
<property>
<name>fs.obs.connection.ssl.enabled</name>
<value>false</value>
<description>Enable or disable SSL connections to OBS.</description>
</property>
<property>
<name>fs.obs.connection.maximum</name>
<value>1000</value>
<description>Maximum number of simultaneous connections to OBS.</description>
</property>
<property>
<name>fs.obs.connection.establish.timeout</name>
<value>120000</value>
<description>Socket connection setup timeout in milliseconds.</description>
</property>
<property>
<name>fs.obs.connection.timeout</name>
<value>120000</value>
<description>Socket connection timeout in milliseconds.</description>
</property>
<property>
<name>fs.obs.idle.connection.time</name>
<value>30000</value>
<description>Socket idle connection time.</description>
</property>
<property>
<name>fs.obs.max.idle.connections</name>
<value>1000</value>
<description>Maximum number of socket idle connections.</description>
</property>
<property>
<name>fs.obs.socket.send.buffer</name>
<value>256 * 1024</value>
<description>Socket send buffer to be used in OBS SDK. Represented in bytes.</description>
</property>
<property>
<name>fs.obs.socket.recv.buffer</name>
<value>256 * 1024</value>
<description>Socket receive buffer to be used in OBS SDK. Represented in bytes.</description>
</property>
<property>
<name>fs.obs.threads.keepalivetime</name>
<value>60</value>
<description>Number of seconds a thread can be idle before being
terminated in thread pool.</description>
</property>
<property>
<name>fs.obs.threads.max</name>
<value>20</value>
<description> Maximum number of concurrent active (part)uploads,
which each use a thread from thread pool.</description>
</property>
<property>
<name>fs.obs.max.total.tasks</name>
<value>20</value>
<description>Number of (part)uploads allowed to the queue before
blocking additional uploads.</description>
</property>
<property>
<name>fs.obs.delete.threads.max</name>
<value>20</value>
<description>Max number of delete threads.</description>
</property>
<property>
<name>fs.obs.multipart.size</name>
<value>104857600</value>
<description>Part size for multipart upload.
</description>
</property>
<property>
<name>fs.obs.multiobjectdelete.maximum</name>
<value>1000</value>
<description>Max number of objects in one multi-object delete call.
</description>
</property>
<property>
<name>fs.obs.fast.upload.buffer</name>
<value>disk</value>
<description>Which buffer to use. Default is `disk`, value may be
`disk` | `array` | `bytebuffer`.
</description>
</property>
<property>
<name>fs.obs.buffer.dir</name>
<value>dir1,dir2,dir3</value>
<description>Comma separated list of directories that will be used to buffer file
uploads to. This option takes effect only when the option 'fs.obs.fast.upload.buffer'
is set to 'disk'.
</description>
</property>
<property>
<name>fs.obs.fast.upload.active.blocks</name>
<value>4</value>
<description>Maximum number of blocks a single output stream can have active
(uploading, or queued to the central FileSystem instance's pool of queued
operations).
</description>
</property>
<property>
<name>fs.obs.readahead.range</name>
<value>1024 * 1024</value>
<description>Bytes to read ahead during a seek() before closing and
re-opening the OBS HTTP connection. </description>
</property>
<property>
<name>fs.obs.read.transform.enable</name>
<value>true</value>
<description>Flag indicating if socket connections can be reused by
position read. Set `false` only for HBase.</description>
</property>
<property>
<name>fs.obs.list.threads.core</name>
<value>30</value>
<description>Number of core list threads.</description>
</property>
<property>
<name>fs.obs.list.threads.max</name>
<value>60</value>
<description>Maximum number of list threads.</description>
</property>
<property>
<name>fs.obs.list.workqueue.capacity</name>
<value>1024</value>
<value>Capacity of list work queue.</value>
</property>
<property>
<name>fs.obs.list.parallel.factor</name>
<value>30</value>
<description>List parallel factor.</description>
</property>
<property>
<name>fs.obs.trash.enable</name>
<value>false</value>
<description>Switch for the fast delete.</description>
</property>
<property>
<name>fs.obs.trash.dir</name>
<description>The fast delete recycle directory.</description>
</property>
<property>
<name>fs.obs.block.size</name>
<value>128 * 1024 * 1024</value>
<description>Default block size for OBS FileSystem.
</description>
</property>
```
## Testing the hadoop-huaweicloud Module
The `hadoop-huaweicloud` module includes a full suite of unit tests.
Most of the tests will run against the HuaweiCloud OBS. To run these
tests, please create `src/test/resources/auth-keys.xml` with OBS account
information mentioned in the above sections and the following properties.
```xml
<property>
<name>fs.contract.test.fs.obs</name>
<value>obs://obsfilesystem-bucket</value>
</property>
```

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#banner {
height: 93px;
background: none;
}
#bannerLeft img {
margin-left: 30px;
margin-top: 10px;
}
#bannerRight img {
margin: 17px;
}

View File

@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractBondedFSContract;
/**
* The contract of OBS: only enabled if the test bucket is provided.
*/
public class OBSContract extends AbstractBondedFSContract {
public static final String CONTRACT_XML = "contract/obs.xml";
private static final String CONTRACT_ENABLE_KEY =
"fs.obs.test.contract.enable";
private static final boolean CONTRACT_ENABLE_DEFAULT = false;
public OBSContract(Configuration conf) {
super(conf);
//insert the base features
addConfResource(CONTRACT_XML);
}
@Override
public String getScheme() {
return "obs";
}
@Override
public Path getTestPath() {
return OBSTestUtils.createTestPath(super.getTestPath());
}
public synchronized static boolean isContractTestEnabled() {
Configuration conf = null;
boolean isContractTestEnabled = true;
if (conf == null) {
conf = getConfiguration();
}
String fileSystem = conf.get(OBSTestConstants.TEST_FS_OBS_NAME);
if (fileSystem == null || fileSystem.trim().length() == 0) {
isContractTestEnabled = false;
}
return isContractTestEnabled;
}
public synchronized static Configuration getConfiguration() {
Configuration newConf = new Configuration();
newConf.addResource(CONTRACT_XML);
return newConf;
}
}

View File

@ -16,19 +16,25 @@
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.fs.PathIOException;
package org.apache.hadoop.fs.obs;
/**
* An exception raised when a table being deleted is still present after
* the wait time is exceeded.
* Constants for OBS Testing.
*/
public class TableDeleteTimeoutException extends PathIOException {
TableDeleteTimeoutException(final String path,
final String error,
final Throwable cause) {
super(path, error, cause);
final class OBSTestConstants {
private OBSTestConstants(){
}
/**
* Name of the test filesystem.
*/
static final String TEST_FS_OBS_NAME = "fs.contract.test.fs.obs";
/**
* Fork ID passed down from maven if the test is running in parallel.
*/
static final String TEST_UNIQUE_FORK_ID = "test.unique.fork.id";
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.internal.AssumptionViolatedException;
import java.io.IOException;
import java.net.URI;
import static org.apache.hadoop.fs.obs.OBSTestConstants.*;
import static org.apache.hadoop.fs.obs.OBSConstants.*;
/**
* Utilities for the OBS tests.
*/
public final class OBSTestUtils {
/**
* Create the test filesystem.
* <p>
* If the test.fs.obs.name property is not set, this will trigger a JUnit
* failure.
* <p>
* Multipart purging is enabled.
*
* @param conf configuration
* @return the FS
* @throws IOException IO Problems
* @throws AssumptionViolatedException if the FS is not named
*/
public static OBSFileSystem createTestFileSystem(Configuration conf)
throws IOException {
return createTestFileSystem(conf, false);
}
/**
* Create the test filesystem with or without multipart purging
* <p>
* If the test.fs.obs.name property is not set, this will trigger a JUnit
* failure.
*
* @param conf configuration
* @param purge flag to enable Multipart purging
* @return the FS
* @throws IOException IO Problems
* @throws AssumptionViolatedException if the FS is not named
*/
@SuppressWarnings("deprecation")
public static OBSFileSystem createTestFileSystem(Configuration conf,
boolean purge)
throws IOException {
String fsname = conf.getTrimmed(TEST_FS_OBS_NAME, "");
boolean liveTest = !StringUtils.isEmpty(fsname);
URI testURI = null;
if (liveTest) {
testURI = URI.create(fsname);
liveTest = testURI.getScheme().equals(OBSConstants.OBS_SCHEME);
}
if (!liveTest) {
// This doesn't work with our JUnit 3 style test cases, so instead we'll
// make this whole class not run by default
throw new AssumptionViolatedException(
"No test filesystem in " + TEST_FS_OBS_NAME);
}
OBSFileSystem fs1 = new OBSFileSystem();
//enable purging in tests
if (purge) {
conf.setBoolean(PURGE_EXISTING_MULTIPART, true);
// but a long delay so that parallel multipart tests don't
// suddenly start timing out
conf.setInt(PURGE_EXISTING_MULTIPART_AGE, 30 * 60);
}
fs1.initialize(testURI, conf);
return fs1;
}
/**
* Create a test path, using the value of
* {@link OBSTestConstants#TEST_UNIQUE_FORK_ID}
* if it is set.
*
* @param defVal default value
* @return a path
*/
public static Path createTestPath(Path defVal) {
String testUniqueForkId = System.getProperty(
OBSTestConstants.TEST_UNIQUE_FORK_ID);
return testUniqueForkId == null ? defVal :
new Path("/" + testUniqueForkId, "test");
}
/**
* This class should not be instantiated.
*/
private OBSTestUtils() {
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractAppendTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Assume;
/**
* Append test cases on obs file system.
*/
public class TestOBSContractAppend extends AbstractContractAppendTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
@Override
public void testRenameFileBeingAppended() {
Assume.assumeTrue("unsupport.", false);
}
}

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -15,33 +15,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Assume;
/**
* End-to-end tests for COMPOSITE_CRC combine mode.
* Create test cases on obs file system.
*/
public class TestFileChecksumCompositeCrc extends TestFileChecksum {
public class TestOBSContractCreate extends AbstractContractCreateTest {
@Override
protected void customizeConf(Configuration conf) {
conf.set(
HdfsClientConfigKeys.DFS_CHECKSUM_COMBINE_MODE_KEY, "COMPOSITE_CRC");
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
@Override
protected boolean expectComparableStripedAndReplicatedFiles() {
return true;
public void testCreatedFileIsImmediatelyVisible() {
Assume.assumeTrue("unsupport.", false);
}
@Override
protected boolean expectComparableDifferentBlockSizeReplicatedFiles() {
return true;
}
@Override
protected boolean expectSupportForSingleFileMixedBytesPerChecksum() {
return true;
public void testCreatedFileIsVisibleOnFlush() {
Assume.assumeTrue("unsupport", false);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Delete test cases on obs file system.
*/
public class TestOBSContractDelete extends AbstractContractDeleteTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Get file status test cases on obs file system.
*/
public class TestOBSContractGetFileStatus extends
AbstractContractGetFileStatusTest {
@Override
protected AbstractFSContract createContract(
final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Mkdir test cases on obs file system.
*/
public class TestOBSContractMkdir extends AbstractContractMkdirTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Open test cases on obs file system.
*/
public class TestOBSContractOpen extends AbstractContractOpenTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractRenameTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Assume;
/**
* Rename test cases on obs file system.
*/
public class TestOBSContractRename extends AbstractContractRenameTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
@Override
public void testRenameFileUnderFileSubdir() {
Assume.assumeTrue("unsupport.", false);
}
@Override
public void testRenameFileUnderFile() {
Assume.assumeTrue("unsupport.", false);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Root directory test cases on obs file system.
*/
public class TestOBSContractRootDir extends AbstractContractRootDirectoryTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
/**
* Seek test cases on obs file system.
*/
public class TestOBSContractSeek extends AbstractContractSeekTest {
@Override
protected AbstractFSContract createContract(final Configuration conf) {
return new OBSContract(conf);
}
}

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.TestFSMainOperationsLocalFileSystem;
import org.junit.After;
import org.junit.Assume;
import org.junit.Before;
/**
* <p>
* A collection of tests for the {@link FileSystem}. This test should be used
* for testing an instance of FileSystem that has been initialized to a specific
* default FileSystem such a LocalFileSystem, HDFS,OBS, etc.
* </p>
* <p>
* To test a given {@link FileSystem} implementation create a subclass of this
* test and override {@link #setUp()} to initialize the <code>fSys</code> {@link
* FileSystem} instance variable.
* <p>
* Since this a junit 4 you can also do a single setup before the start of any
* tests. E.g.
*
*
* </p>
*/
public class TestOBSFSMainOperations extends
TestFSMainOperationsLocalFileSystem {
@Override
@Before
public void setUp() throws Exception {
skipTestCheck();
Configuration conf = new Configuration();
conf.addResource(OBSContract.CONTRACT_XML);
fSys = OBSTestUtils.createTestFileSystem(conf);
}
@Override
public void testWorkingDirectory() {
Assume.assumeTrue("unspport.", false);
}
@Override
public void testListStatusThrowsExceptionForUnreadableDir() {
Assume.assumeTrue("unspport.", false);
}
@Override
public void testRenameDirectoryToItself() {
Assume.assumeTrue("unspport.", false);
}
@Override
public void testGlobStatusThrowsExceptionForUnreadableDir() {
Assume.assumeTrue("unspport.", false);
}
@Override
public void testRenameFileToItself() {
Assume.assumeTrue("unspport.", false);
}
@Override
@After
public void tearDown() throws Exception {
if(fSys != null) {
super.tearDown();
}
}
public void skipTestCheck() {
Assume.assumeTrue(OBSContract.isContractTestEnabled());
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
import org.apache.hadoop.fs.FileContextTestHelper;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Assume;
import org.junit.BeforeClass;
import java.net.URI;
import java.util.UUID;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* File context create mkdir test cases on obs file system.
*/
public class TestOBSFileContextCreateMkdir extends
FileContextCreateMkdirBaseTest {
@BeforeClass
public static void skipTestCheck() {
Assume.assumeTrue(OBSContract.isContractTestEnabled());
}
@SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD")
@Override
public void setUp() throws Exception {
Configuration conf = OBSContract.getConfiguration();
conf.addResource(OBSContract.CONTRACT_XML);
String fileSystem = conf.get(OBSTestConstants.TEST_FS_OBS_NAME);
if (fileSystem == null || fileSystem.trim().length() == 0) {
throw new Exception("Default file system not configured.");
}
URI uri = new URI(fileSystem);
FileSystem fs = OBSTestUtils.createTestFileSystem(conf);
if (fc == null) {
this.fc = FileContext.getFileContext(new DelegateToFileSystem(uri, fs,
conf, fs.getScheme(), false) {
}, conf);
}
super.setUp();
}
@Override
protected FileContextTestHelper createFileContextHelper() {
// On Windows, root directory path is created from local running
// directory.
// obs does not support ':' as part of the path which results in
// failure.
return new FileContextTestHelper(UUID.randomUUID().toString());
}
}

Some files were not shown because too many files have changed in this diff Show More