HADOOP-17913. Filter deps with release labels (#3437)

This commit is contained in:
Gautham B A 2021-09-16 21:48:58 +05:30 committed by GitHub
parent 4d21655d04
commit 16ca362564
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 89 additions and 12 deletions

View File

@ -26,9 +26,11 @@ the other. Different platforms have different toolchains. Some packages tend to
across platforms and most commonly, a package that's readily available in one platform's toolchain across platforms and most commonly, a package that's readily available in one platform's toolchain
isn't available on another. We thus, resort to building and installing the package from source, isn't available on another. We thus, resort to building and installing the package from source,
causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all
the platforms. We need a system to track a dependency - for a package - for a platform. Thus, the platforms. We need a system to track a dependency - for a package - for a platform
there's a lot of diversity that needs to be handled for managing package dependencies and
`pkg-resolver` caters to that. - (and optionally) for a release. Thus, there's a lot of diversity that needs to be handled for
managing package dependencies and
`pkg-resolver` caters to that.
## Supported platforms ## Supported platforms
@ -53,6 +55,21 @@ there's a lot of diversity that needs to be handled for managing package depende
"package_2", "package_2",
"package_3" "package_3"
] ]
},
"dependency_3": {
"platform_1": {
"release_1": "package_1_1_1",
"release_2": [
"package_1_2_1",
"package_1_2_2"
]
},
"platform_2": [
"package_2_1",
{
"release_1": "package_2_1_1"
}
]
} }
} }
``` ```
@ -65,6 +82,29 @@ how to interpret the above JSON -
2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`. 2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`.
3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for 3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for
`platform_1`. `platform_1`.
4. For `dependency_3`, `package_1_1_1` gets installed only if `release_1` has been specified
for `platform_1`.
5. For `dependency_3`, the packages `package_1_2_1` and `package_1_2_2` gets installed only
if `release_2` has been specified for `platform_1`.
6. For `dependency_3`, for `platform_2`, `package_2_1` is always installed, but `package_2_1_1` gets
installed only if `release_1` has been specified.
### Tool help
```shell
$ pkg-resolver/resolve.py -h
usage: resolve.py [-h] [-r RELEASE] platform
Platform package dependency resolver for building Apache Hadoop
positional arguments:
platform The name of the platform to resolve the dependencies for
optional arguments:
-h, --help show this help message and exit
-r RELEASE, --release RELEASE
The release label to filter the packages for the given platform
```
## Standalone packages ## Standalone packages

View File

@ -20,26 +20,55 @@
Platform package dependency resolver for building Apache Hadoop. Platform package dependency resolver for building Apache Hadoop.
""" """
import argparse
import json import json
import sys import sys
from check_platform import is_supported_platform from check_platform import is_supported_platform
def get_packages(platform): def get_packages(platform, release=None):
""" """
Resolve and get the list of packages to install for the given platform. Resolve and get the list of packages to install for the given platform.
:param platform: The platform for which the packages needs to be resolved. :param platform: The platform for which the packages needs to be resolved.
:param release: An optional parameter that filters the packages of the given platform for the
specified release.
:return: A list of resolved packages to install. :return: A list of resolved packages to install.
""" """
with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file: with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file:
pkgs = json.loads(pkg_file.read()) pkgs = json.loads(pkg_file.read())
packages = [] packages = []
for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()):
if isinstance(platforms.get(platform), list): def process_package(package, in_release=False):
packages.extend(platforms.get(platform)) """
Processes the given package object that belongs to a platform and adds it to the packages
list variable in the parent scope.
In essence, this method recursively traverses the JSON structure defined in packages.json
and performs the core filtering.
:param package: The package object to process.
:param in_release: A boolean that indicates whether the current travels belongs to a package
that needs to be filtered for the given release label.
"""
if isinstance(package, list):
for entry in package:
process_package(entry, in_release)
elif isinstance(package, dict):
if release is None:
return
for entry in package.get(release, []):
process_package(entry, in_release=True)
elif isinstance(package, str):
# Filter out the package that doesn't belong to this release,
# if a release label has been specified.
if release is not None and not in_release:
return
packages.append(package)
else: else:
packages.append(platforms.get(platform)) raise Exception('Unknown package of type: {}'.format(type(package)))
for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()):
process_package(platforms.get(platform))
return packages return packages
@ -49,13 +78,21 @@ if __name__ == '__main__':
file=sys.stderr) file=sys.stderr)
sys.exit(1) sys.exit(1)
platform_arg = sys.argv[1] arg_parser = argparse.ArgumentParser(
if not is_supported_platform(platform_arg): description='Platform package dependency resolver for building Apache Hadoop')
arg_parser.add_argument('-r', '--release', nargs=1, type=str,
help='The release label to filter the packages for the given platform')
arg_parser.add_argument('platform', nargs=1, type=str,
help='The name of the platform to resolve the dependencies for')
args = arg_parser.parse_args()
if not is_supported_platform(args.platform[0]):
print( print(
'ERROR: The given platform {} is not supported. ' 'ERROR: The given platform {} is not supported. '
'Please refer to platforms.json for a list of supported platforms'.format( 'Please refer to platforms.json for a list of supported platforms'.format(
platform_arg), file=sys.stderr) args.platform), file=sys.stderr)
sys.exit(1) sys.exit(1)
packages_to_install = get_packages(platform_arg) packages_to_install = get_packages(args.platform[0],
args.release[0] if args.release is not None else None)
print(' '.join(packages_to_install)) print(' '.join(packages_to_install))