Fix unknown licenses (#31223)

The goal of this commit is to address unknown licenses when producing
the dependencies info report. We have two different checks that we run
on licenses. The first check is whether or not we have stashed a copy of
the license text for a dependency in the repository. The second is to
map every dependency to a license type (e.g., BSD 3-clause). The problem
here is that the way we were handling licenses in the second check
differs from how we handle licenses in the first check. The first check
works by finding a license file with the name of the artifact followed
by the text -LICENSE.txt. Yet in some cases we allow mapping an artifact
name to another name used to check for the license (e.g., we map
lucene-.* to lucene, and opensaml-.* to shibboleth. The second check
understood the first way of looking for a license file but not the
second way. So in this commit we teach the second check about the
mappings from artifact names to license names. We do this by copying the
configuration from the dependencyLicenses task to the dependenciesInfo
task and then reusing the code from the first check in the second
check. There were some other challenges here though. For example,
dependenciesInfo was checking too many dependencies. For now, we should
only be checking direct dependencies and leaving transitive dependencies
from another org.elasticsearch artifact to that artifact (we want to do
this differently in a follow-up). We also want to disable
dependenciesInfo for projects that we do not publish, users only care
about licenses they might be exposed to if they use our assembled
products. With all of the changes in this commit we have eliminated all
unknown licenses. A follow-up will enforce that when we add a new
dependency it does not get mapped to unknown, these will be forbidden in
the future. Therefore, with this change and earlier changes are left
having no unknown licenses and two custom licenses; custom here means it
does not map to an SPDX license type. Those two licenses are xz and
ldapsdk. A future change will not allow additional custom licenses
unless they are explicitly whitelisted. This ensures that if a new
dependency is added it is mapped to an SPDX license or mapped to custom
because it does not have an SPDX license.
This commit is contained in:
Jason Tedor 2018-06-09 07:28:41 -04:00 committed by GitHub
parent 3db1fe7afe
commit 65c107b47d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 87 additions and 24 deletions

View File

@ -543,7 +543,7 @@ subprojects { project ->
}
}
/* Remove assemble on all qa projects because we don't need to publish
/* Remove assemble/dependenciesInfo on all qa projects because we don't need to publish
* artifacts for them. */
gradle.projectsEvaluated {
subprojects {
@ -553,6 +553,11 @@ gradle.projectsEvaluated {
project.tasks.remove(assemble)
project.build.dependsOn.remove('assemble')
}
Task dependenciesInfo = project.tasks.findByName('dependenciesInfo')
if (dependenciesInfo) {
project.tasks.remove(dependenciesInfo)
project.precommit.dependsOn.remove('dependenciesInfo')
}
}
}
}

View File

@ -762,6 +762,10 @@ class BuildPlugin implements Plugin<Project> {
private static configureDependenciesInfo(Project project) {
Task deps = project.tasks.create("dependenciesInfo", DependenciesInfoTask.class)
deps.dependencies = project.configurations.compile.allDependencies
deps.runtimeConfiguration = project.configurations.runtime
deps.compileOnlyConfiguration = project.configurations.compileOnly
project.afterEvaluate {
deps.mappings = project.dependencyLicenses.mappings
}
}
}

View File

@ -19,14 +19,19 @@
package org.elasticsearch.gradle
import org.elasticsearch.gradle.precommit.DependencyLicensesTask
import org.gradle.api.DefaultTask
import org.gradle.api.artifacts.Configuration
import org.gradle.api.artifacts.Dependency
import org.gradle.api.artifacts.DependencyResolutionListener
import org.gradle.api.artifacts.DependencySet
import org.gradle.api.tasks.Input
import org.gradle.api.tasks.InputDirectory
import org.gradle.api.tasks.OutputFile
import org.gradle.api.tasks.TaskAction
import java.util.regex.Matcher
import java.util.regex.Pattern
/**
* A task to gather information about the dependencies and export them into a csv file.
@ -44,7 +49,14 @@ public class DependenciesInfoTask extends DefaultTask {
/** Dependencies to gather information from. */
@Input
public DependencySet dependencies
public Configuration runtimeConfiguration
/** We subtract compile-only dependencies. */
@Input
public Configuration compileOnlyConfiguration
@Input
public LinkedHashMap<String, String> mappings
/** Directory to read license files */
@InputDirectory
@ -59,15 +71,34 @@ public class DependenciesInfoTask extends DefaultTask {
@TaskAction
public void generateDependenciesInfo() {
final DependencySet runtimeDependencies = runtimeConfiguration.getAllDependencies()
// we have to resolve the transitive dependencies and create a group:artifactId:version map
final Set<String> compileOnlyArtifacts =
compileOnlyConfiguration
.getResolvedConfiguration()
.resolvedArtifacts
.collect { it -> "${it.moduleVersion.id.group}:${it.moduleVersion.id.name}:${it.moduleVersion.id.version}" }
final StringBuilder output = new StringBuilder()
for (Dependency dependency : dependencies) {
// Only external dependencies are checked
if (dependency.group != null && dependency.group.contains("elasticsearch") == false) {
final String url = createURL(dependency.group, dependency.name, dependency.version)
final String licenseType = getLicenseType(dependency.group, dependency.name)
output.append("${dependency.group}:${dependency.name},${dependency.version},${url},${licenseType}\n")
for (final Dependency dependency : runtimeDependencies) {
// we do not need compile-only dependencies here
if (compileOnlyArtifacts.contains("${dependency.group}:${dependency.name}:${dependency.version}")) {
continue
}
// only external dependencies are checked
if (dependency.group != null && dependency.group.contains("org.elasticsearch")) {
continue
}
final String url = createURL(dependency.group, dependency.name, dependency.version)
final String dependencyName = DependencyLicensesTask.getDependencyName(mappings, dependency.name)
logger.info("mapped dependency ${dependency.group}:${dependency.name} to ${dependencyName} for license info")
final String licenseType = getLicenseType(dependency.group, dependencyName)
output.append("${dependency.group}:${dependency.name},${dependency.version},${url},${licenseType}\n")
}
outputFile.setText(output.toString(), 'UTF-8')
}

View File

@ -109,6 +109,10 @@ public class DependencyLicensesTask extends DefaultTask {
mappings.put(from, to)
}
public LinkedHashMap<String, String> getMappings() {
return new LinkedHashMap<>(mappings)
}
/**
* Add a rule which will skip SHA checking for the given dependency name. This should be used for
* locally build dependencies, which cause the sha to change constantly.
@ -129,10 +133,6 @@ public class DependencyLicensesTask extends DefaultTask {
throw new GradleException("Licences dir ${licensesDir} does not exist, but there are dependencies")
}
// order is the same for keys and values iteration since we use a linked hashmap
List<String> mapped = new ArrayList<>(mappings.values())
Pattern mappingsPattern = Pattern.compile('(' + mappings.keySet().join(')|(') + ')')
Map<String, Integer> licenses = new HashMap<>()
Map<String, Integer> notices = new HashMap<>()
Set<File> shaFiles = new HashSet<File>()
@ -162,16 +162,10 @@ public class DependencyLicensesTask extends DefaultTask {
checkSha(dependency, jarName, shaFiles)
}
logger.info("Checking license/notice for " + depName)
Matcher match = mappingsPattern.matcher(depName)
if (match.matches()) {
int i = 0
while (i < match.groupCount() && match.group(i + 1) == null) ++i;
logger.info("Mapped dependency name ${depName} to ${mapped.get(i)} for license check")
depName = mapped.get(i)
}
checkFile(depName, jarName, licenses, 'LICENSE')
checkFile(depName, jarName, notices, 'NOTICE')
final String dependencyName = getDependencyName(mappings, depName)
logger.info("mapped dependency name ${depName} to ${dependencyName} for license/notice check")
checkFile(dependencyName, jarName, licenses, 'LICENSE')
checkFile(dependencyName, jarName, notices, 'NOTICE')
}
licenses.each { license, count ->
@ -189,6 +183,19 @@ public class DependencyLicensesTask extends DefaultTask {
}
}
public static String getDependencyName(final LinkedHashMap<String, String> mappings, final String dependencyName) {
// order is the same for keys and values iteration since we use a linked hashmap
List<String> mapped = new ArrayList<>(mappings.values())
Pattern mappingsPattern = Pattern.compile('(' + mappings.keySet().join(')|(') + ')')
Matcher match = mappingsPattern.matcher(dependencyName)
if (match.matches()) {
int i = 0
while (i < match.groupCount() && match.group(i + 1) == null) ++i;
return mapped.get(i)
}
return dependencyName
}
private File getShaFile(String jarName) {
return new File(licensesDir, jarName + SHA_EXTENSION)
}

View File

@ -31,6 +31,9 @@ esplugin {
tasks.remove(assemble)
build.dependsOn.remove('assemble')
dependencyLicenses.enabled = false
dependenciesInfo.enabled = false
compileJava.options.compilerArgs << "-Xlint:-cast,-deprecation,-rawtypes,-try,-unchecked"
// no unit tests

View File

@ -32,6 +32,11 @@ Collection distributions = project('archives').subprojects + project('packages')
// Concatenates the dependencies CSV files into a single file
task generateDependenciesReport(type: ConcatFilesTask) {
project.rootProject.allprojects {
afterEvaluate {
if (it.tasks.findByName("dependenciesInfo")) dependsOn it.tasks.dependenciesInfo
}
}
files = fileTree(dir: project.rootDir, include: '**/dependencies.csv' )
headerLine = "name,version,url,license"
target = new File(System.getProperty('csv')?: "${project.buildDir}/dependencies/es-dependencies.csv")

View File

@ -22,3 +22,5 @@ test.enabled = false
// Not published so no need to assemble
tasks.remove(assemble)
build.dependsOn.remove('assemble')
dependenciesInfo.enabled = false

View File

@ -28,5 +28,9 @@ gradle.projectsEvaluated {
project.tasks.remove(assemble)
project.build.dependsOn.remove('assemble')
}
Task dependenciesInfo = project.tasks.findByName('dependenciesInfo')
if (dependenciesInfo) {
project.precommit.dependsOn.remove('dependenciesInfo')
}
}
}

View File

@ -22,6 +22,7 @@ dependencies {
test.enabled = false
dependencyLicenses.enabled = false
dependenciesInfo.enabled = false
// the main files are actually test files, so use the appropriate forbidden api sigs
forbiddenApisMain {

View File

@ -10,6 +10,7 @@ dependencies {
forbiddenApisMain.enabled = true
dependencyLicenses.enabled = false
dependenciesInfo.enabled = false
jarHell.enabled = false