Build: Switch ml snapshot dependency to a local project (elastic/x-pack-elasticsearch#1559)

This commit adds an internal project call ml-cpp-snapshot which when
built will pull the ml cpp zip file from the prelert bucket. The GET
request has retries added to handle the dynamic aws creds eventual

Original commit: elastic/x-pack-elasticsearch@1bba7d0f08
This commit is contained in:
Ryan Ernst 2017-05-26 01:15:12 -07:00 committed by GitHub
parent 4ecd1e5d50
commit bb71839b85
2 changed files with 164 additions and 158 deletions

View File

@ -1,12 +1,3 @@
import com.amazonaws.AmazonServiceException
import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.AWSCredentials
import com.amazonaws.auth.BasicAWSCredentials
import com.bettercloud.vault.Vault
import com.bettercloud.vault.VaultConfig
import com.bettercloud.vault.response.LogicalResponse
import org.elasticsearch.gradle.MavenFilteringHack
import org.elasticsearch.gradle.test.NodeInfo
@ -14,8 +5,6 @@ import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardCopyOption
import java.nio.file.attribute.PosixFilePermission
import java.nio.file.attribute.PosixFilePermissions
group 'org.elasticsearch.plugin'
@ -30,140 +19,6 @@ esplugin {
archivesBaseName = 'x-pack' // for api jar
buildscript {
repositories {
dependencies {
classpath group: 'com.bettercloud', name: 'vault-java-driver', version:"1.1.0"
classpath 'com.amazonaws:aws-java-sdk-s3:1.10.33'
Closure setAwsCreds = {
* The Elastic Secrets vault is served via HTTPS with a Let's Encrypt certificate. The root certificates that cross-signed the Let's
* Encrypt certificates were not trusted by the JDK until 8u101. Therefore, we enforce that the JDK is at least 8u101 here.
final String javaVersion = System.getProperty('java.version')
final String javaVendor = System.getProperty('java.vendor')
def matcher = javaVersion =~ /1\.8\.0(?:_(\d+))?/
boolean matches = matcher.matches()
assert matches
final int update
if ( == null) {
update = 0
} else {
update =
if (update < 101) {
throw new GradleException("JDK ${javaVendor} ${javaVersion} does not have necessary root certificates " +
"(, update your JDK to at least JDK 8u101+")
// get an authentication token with vault
String homePath =['user.home']
File githubToken = file("${homePath}/.elastic/github.token")
final String VAULT_URL = ''
final String VAULT_ROLE_ID = "8e90dd88-5a8e-9c12-0da9-5439f293ff97"
final String VAULT_SECRET_ID = System.env.VAULT_SECRET_ID
String authBody = null
URL vaultUrl = null
if (githubToken.exists()) {
try {
Set<PosixFilePermission> perms = Files.getPosixFilePermissions(githubToken.toPath())
if (perms.equals(PosixFilePermissions.fromString("rw-------")) == false) {
throw new GradleException('github.token must have 600 permissions')
} catch (UnsupportedOperationException e) {
// Assume this isn't a POSIX file system
vaultUrl = new URL(VAULT_URL + '/v1/auth/github/login')
authBody = "{\"token\": \"${githubToken.getText('UTF-8').trim()}\"}"
} else if (VAULT_SECRET_ID != null) {
vaultUrl = new URL(VAULT_URL + '/v1/auth/approle/login')
authBody = "{\"role_id\": \"${VAULT_ROLE_ID}\", \"secret_id\": \"${VAULT_SECRET_ID}\"}"
} else {
throw new GradleException('Missing ~/.elastic/github.token file or VAULT_SECRET_ID environment variable, needed to authenticate with vault for secrets')
HttpURLConnection vaultConn = (HttpURLConnection) vaultUrl.openConnection()
vaultConn.setRequestProperty('Content-Type', 'application/json')
vaultConn.outputStream.withWriter('UTF-8') { writer ->
Object authResponse = new groovy.json.JsonSlurper().parseText(vaultConn.content.text)
VaultConfig config = new VaultConfig(VAULT_URL, authResponse.auth.client_token)
Vault vault = new Vault(config)
LogicalResponse secret = vault.logical().read("aws-dev/creds/prelertartifacts")
project.ext.mlAwsAccessKey ='access_key')
project.ext.mlAwsSecretKey ='secret_key')
// Retry for up to 1 minute to give AWS a chance to propagate the credentials
int retries = 120
while (retries > 0) {
AWSCredentials creds = new BasicAWSCredentials(project.mlAwsAccessKey, project.mlAwsSecretKey)
ClientConfiguration clientConfiguration = new ClientConfiguration()
// the response metadata cache is only there for diagnostics purposes,
// but can force objects from every response to the old generation.
AmazonS3Client client = new AmazonS3Client(creds, clientConfiguration)
try {
client.headBucket(new HeadBucketRequest('prelert-artifacts'))
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 403 || retries == 0) {
throw new GradleException('Could not access ml-cpp artifacts. Timed out after 60 seconds', e)
gradle.taskGraph.whenReady { taskGraph ->
// Vault auth to get keys for access to cpp artifacts
if (taskGraph.hasTask(bundlePlugin)) {
if (project.hasProperty("mlAwsAccessKey") == false && project.hasProperty("mlAwsSecretKey") == false) {
if (project.gradle.startParameter.isOffline()) {
// if the project is offline, then we shouldn't try to contact AWS and instead can just
// use a cached, but possibly expired artifact so that the build works...
project.ext.mlAwsAccessKey = ""
project.ext.mlAwsSecretKey = ""
} else {
repositories {
maven {
url "s3://prelert-artifacts/maven"
credentials(AwsCredentials) {
accessKey "${project.mlAwsAccessKey}"
secretKey "${project.mlAwsSecretKey}"
// For some unknown reason, this from statement triggers
// a resolve of the nativeBundle configuration. It should
// be delayed until the bundlePlugin task is executed, but
// it is not. So for now, we add the extra files to bundlePlugin
// here right after configuring the prelert maven, which
// will only happen when bundlePlugin will be run anyways.
project.bundlePlugin.from {
project.extractNativeLicenses.from {
// TODO: fix this!
ext.compactProfile = 'full'
@ -188,19 +43,14 @@ licenseHeaders {
configurations {
nativeBundle {
// Check for new native code on every build, otherwise builds can
// fail for 24 hours after a change to the C++ output format
resolutionStrategy.cacheChangingModulesFor 0, 'seconds'
if (findProject(':machine-learning-cpp') != null) {
configurations.nativeBundle {
resolutionStrategy.dependencySubstitution {
substitute module("") with project(":machine-learning-cpp")
if (findProject(':machine-learning-cpp') != null) {
substitute module("") with project(":machine-learning-cpp")
} else {
substitute module("") with project("${project.path}:ml-cpp-snapshot")
bundlePlugin.dependsOn ':machine-learning-cpp:buildUberZip'
dependencies {
@ -229,9 +79,7 @@ dependencies {
// ml deps
compile 'net.sf.supercsv:super-csv:2.4.0'
nativeBundle ("${project.version}@zip") {
changing = true
nativeBundle "${project.version}@zip"
testCompile 'org.ini4j:ini4j:0.5.2'
// common test deps
@ -277,7 +125,11 @@ forbiddenPatterns {
task extractNativeLicenses(type: Copy) {
dependsOn configurations.nativeBundle
into "${buildDir}"
from {
include 'platform/licenses/**'
// This is to reduce the risk of credentials used to access the native bundle not
// having propagated throughout AWS by the time it's downloaded; the time needed
@ -287,12 +139,16 @@ task extractNativeLicenses(type: Copy) {
// TODO: standardize packaging config for plugins
bundlePlugin {
dependsOn configurations.nativeBundle
from('bin/x-pack') {
into 'bin'
from('config/x-pack') {
into 'config'
from {
// We don't ship the individual nativeBundle licenses - instead
// they get combined into the top level NOTICES file we ship
exclude 'platform/licenses/**'

View File

@ -0,0 +1,150 @@
import com.amazonaws.AmazonServiceException
import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.AWSCredentials
import com.amazonaws.auth.BasicAWSCredentials
import com.bettercloud.vault.Vault
import com.bettercloud.vault.VaultConfig
import com.bettercloud.vault.response.LogicalResponse
import java.nio.file.Files
import java.nio.file.attribute.PosixFilePermission
import java.nio.file.attribute.PosixFilePermissions
import org.elasticsearch.gradle.VersionProperties
apply plugin: 'distribution'
buildscript {
repositories {
dependencies {
classpath group: 'com.bettercloud', name: 'vault-java-driver', version:"1.1.0"
classpath 'com.amazonaws:aws-java-sdk-s3:1.10.33'
ext.version = VersionProperties.elasticsearch
// This project pulls a snapshot version of the ML cpp artifacts and sets that as the artifact
// for this project so it can be used with dependency substitution. We do not use gradle's
// handling of S3 as a maven repo due to the dynamically generated creds being slow to propagate,
// necessitating retries.
void checkJavaVersion() {
* The Elastic Secrets vault is served via HTTPS with a Let's Encrypt certificate. The root certificates that cross-signed the Let's
* Encrypt certificates were not trusted by the JDK until 8u101. Therefore, we enforce that the JDK is at least 8u101 here.
final String javaVersion = System.getProperty('java.version')
final String javaVendor = System.getProperty('java.vendor')
def matcher = javaVersion =~ /1\.8\.0(?:_(\d+))?/
boolean matches = matcher.matches()
assert matches
final int update
if ( == null) {
update = 0
} else {
update =
if (update < 101) {
throw new GradleException("JDK ${javaVendor} ${javaVersion} does not have necessary root certificates " +
"(, update your JDK to at least JDK 8u101+")
void setupVaultAuthMethod() {
String VAULT_BASE_URL = ''
String VAULT_ROLE_ID = "8e90dd88-5a8e-9c12-0da9-5439f293ff97"
// get an authentication token with vault
String homePath =['user.home']
File githubToken = file("${homePath}/.elastic/github.token")
String vaultAuthBody = null
URL vaultUrl = null
if (githubToken.exists()) {
try {
Set<PosixFilePermission> perms = Files.getPosixFilePermissions(githubToken.toPath())
if (perms.equals(PosixFilePermissions.fromString("rw-------")) == false) {
throw new GradleException('github.token must have 600 permissions')
} catch (UnsupportedOperationException e) {
// Assume this isn't a POSIX file system
vaultUrl = new URL(VAULT_BASE_URL + '/v1/auth/github/login')
vaultAuthBody = "{\"token\": \"${githubToken.getText('UTF-8').trim()}\"}"
} else if (VAULT_SECRET_ID != null) {
vaultUrl = new URL(VAULT_BASE_URL + '/v1/auth/approle/login')
vaultAuthBody = "{\"role_id\": \"${VAULT_ROLE_ID}\", \"secret_id\": \"${VAULT_SECRET_ID}\"}"
} else {
throw new GradleException('Missing ~/.elastic/github.token file or VAULT_SECRET_ID environment variable, needed to authenticate with vault for secrets')
project.ext.vaultAuthBody = vaultAuthBody
project.ext.vaultUrl = vaultUrl
S3Object getZip() {
HttpURLConnection vaultConn = (HttpURLConnection) vaultUrl.openConnection()
vaultConn.setRequestProperty('Content-Type', 'application/json')
vaultConn.outputStream.withWriter('UTF-8') { writer ->
Object authResponse = new groovy.json.JsonSlurper().parseText(vaultConn.content.text)
VaultConfig config = new VaultConfig('', authResponse.auth.client_token)
Vault vault = new Vault(config)
LogicalResponse secret = vault.logical().read("aws-dev/creds/prelertartifacts")
final AWSCredentials creds = new BasicAWSCredentials('access_key'),'secret_key'))
// the keys may take a while to propagate, so wait up to 60 seconds retrying
final AmazonS3Client client = new AmazonS3Client(creds)
final String key = "maven/org/elasticsearch/ml/ml-cpp/${version}/ml-cpp-${version}.zip"
int retries = 120
while (retries > 0) {
try {
return client.getObject('prelert-artifacts', key)
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 403) {
throw new GradleException('Error while trying to get ml-cpp snapshot', e)
throw new GradleException('Could not access ml-cpp artifacts. Timed out after 60 seconds')
File snapshotZip = new File(buildDir, "download/ml-cpp-${version}.zip")
task downloadMachineLearningSnapshot {
onlyIf {
// skip for offline builds, just rely on the artifact already having been downloaded before here
project.gradle.startParameter.isOffline() == false
doFirst {
S3Object zip = getZip()
// TODO: skip if modification of s3 key is before last write to local zip file?
InputStream zipStream = zip.getObjectContent()
try {
Files.copy(zipStream, snapshotZip.toPath())
} finally {
gradle.taskGraph.whenReady { taskGraph ->
// do validation of token/java version up front, don't wait for the task to run
if (taskGraph.hasTask(downloadMachineLearningSnapshot)) {
artifacts {
'default' file: snapshotZip, name: 'ml-cpp', type: 'zip', builtBy: downloadMachineLearningSnapshot