2020-01-27 12:05:34 -05:00
|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-01-15 03:55:41 -05:00
|
|
|
import groovy.xml.NamespaceBuilder
|
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
// Configure rat dependencies for use in the custom task.
|
2020-01-15 03:55:41 -05:00
|
|
|
configure(rootProject) {
|
|
|
|
configurations {
|
2020-01-15 04:07:10 -05:00
|
|
|
ratDeps
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
dependencies {
|
2020-01-15 05:44:21 -05:00
|
|
|
ratDeps "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}"
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
// Configure the rat validation task and all scanned directories.
|
2020-01-15 03:55:41 -05:00
|
|
|
allprojects {
|
|
|
|
task("rat", type: RatTask) {
|
|
|
|
group = 'Verification'
|
|
|
|
description = 'Runs Apache Rat checks.'
|
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
def defaultScanFileTree = project.fileTree(projectDir, {
|
|
|
|
// Don't check under the project's build folder.
|
|
|
|
exclude project.buildDir.name
|
|
|
|
|
|
|
|
// Exclude any generated stuff.
|
|
|
|
exclude "src/generated"
|
|
|
|
|
|
|
|
// Don't check any of the subprojects - they have their own rat tasks.
|
|
|
|
exclude subprojects.collect { it.projectDir.name }
|
|
|
|
|
|
|
|
// At the module scope we only check selected file patterns as folks have various .gitignore-d resources
|
|
|
|
// generated by IDEs, etc.
|
|
|
|
include "**/*.gradle"
|
|
|
|
include "**/*.xml"
|
|
|
|
include "**/*.md"
|
|
|
|
include "**/*.py"
|
|
|
|
include "**/*.sh"
|
|
|
|
include "**/*.bat"
|
|
|
|
|
|
|
|
// Include selected patterns from any source folders. We could make this
|
|
|
|
// relative to source sets but it seems to be of little value - all our source sets
|
|
|
|
// live under 'src' anyway.
|
|
|
|
include "src/**"
|
|
|
|
exclude "src/**/*.png"
|
|
|
|
exclude "src/**/*.txt"
|
|
|
|
exclude "src/**/*.zip"
|
|
|
|
exclude "src/**/*.properties"
|
|
|
|
exclude "src/**/*.utf8"
|
|
|
|
|
|
|
|
// Conditionally apply module-specific patterns. We do it here instead
|
|
|
|
// of reconfiguring each project because the provider can be made lazy
|
|
|
|
// and it's easier to manage this way.
|
|
|
|
switch (project.path) {
|
|
|
|
case ":":
|
|
|
|
include "gradlew"
|
|
|
|
include "gradlew.bat"
|
|
|
|
exclude ".gradle"
|
|
|
|
exclude ".idea"
|
|
|
|
exclude ".muse"
|
|
|
|
exclude ".git"
|
|
|
|
|
|
|
|
// Exclude github stuff (templates, workflows).
|
|
|
|
exclude ".github"
|
|
|
|
|
|
|
|
// The root project also includes patterns for the boostrap (buildSrc) and composite
|
|
|
|
// projects. Include their sources in the scan.
|
|
|
|
include "buildSrc/src/**"
|
|
|
|
include "dev-tools/missing-doclet/src/**"
|
2023-11-10 12:31:10 -05:00
|
|
|
|
|
|
|
// do not let RAT attempt to scan a python venv, it gets lost and confused...
|
|
|
|
exclude "dev-tools/aws-jmh/build/**"
|
2021-06-11 03:26:34 -04:00
|
|
|
break
|
|
|
|
|
|
|
|
case ":lucene:analysis:morfologik":
|
|
|
|
exclude "src/**/*.info"
|
|
|
|
exclude "src/**/*.input"
|
|
|
|
break
|
|
|
|
|
|
|
|
case ":lucene:analysis:opennlp":
|
|
|
|
exclude "src/**/en-test-lemmas.dict"
|
|
|
|
break
|
|
|
|
|
2021-08-18 08:13:59 -04:00
|
|
|
case ":lucene:demo":
|
|
|
|
exclude "src/**/knn-token-vectors"
|
|
|
|
break
|
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
case ":lucene:test-framework":
|
|
|
|
exclude "src/**/europarl.lines.txt.seek"
|
|
|
|
break
|
|
|
|
|
|
|
|
case ":lucene:analysis:common":
|
2022-01-05 09:35:02 -05:00
|
|
|
case ":lucene:analysis.tests":
|
2021-06-11 03:26:34 -04:00
|
|
|
exclude "src/**/*.aff"
|
|
|
|
exclude "src/**/*.dic"
|
|
|
|
exclude "src/**/*.good"
|
|
|
|
exclude "src/**/*.sug"
|
|
|
|
exclude "src/**/*.wrong"
|
|
|
|
exclude "src/**/charfilter/*.htm*"
|
|
|
|
exclude "src/**/*LuceneResourcesWikiPage.html"
|
|
|
|
exclude "src/**/*.rslp"
|
|
|
|
break
|
|
|
|
|
|
|
|
case ":lucene:benchmark":
|
|
|
|
exclude "data/"
|
|
|
|
break
|
|
|
|
}
|
|
|
|
})
|
|
|
|
inputFileTrees.add(defaultScanFileTree)
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
/**
|
|
|
|
* An Apache RAT adapter that validates whether files contain acceptable licenses.
|
|
|
|
*/
|
2021-10-14 08:52:42 -04:00
|
|
|
@CacheableTask
|
2020-01-15 03:55:41 -05:00
|
|
|
class RatTask extends DefaultTask {
|
2021-10-14 08:52:42 -04:00
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
@InputFiles
|
2021-10-14 08:52:42 -04:00
|
|
|
@PathSensitive(PathSensitivity.RELATIVE)
|
|
|
|
@IgnoreEmptyDirectories
|
2021-08-24 07:13:26 -04:00
|
|
|
final ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree)
|
2020-01-15 03:55:41 -05:00
|
|
|
|
|
|
|
@OutputFile
|
2021-08-24 07:13:26 -04:00
|
|
|
final RegularFileProperty xmlReport = project.objects.fileProperty().convention(
|
2021-06-11 03:26:34 -04:00
|
|
|
project.layout.buildDirectory.file("rat/rat-report.xml"))
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
def generateReport(File reportFile) {
|
|
|
|
// Set up ant rat task.
|
2020-01-15 04:07:10 -05:00
|
|
|
def ratClasspath = project.rootProject.configurations.ratDeps.asPath
|
2021-11-26 05:39:55 -05:00
|
|
|
ant.setLifecycleLogLevel(AntBuilder.AntMessagePriority.ERROR)
|
2021-08-24 07:13:26 -04:00
|
|
|
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', classpath: ratClasspath)
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
// Collect all output files for debugging.
|
|
|
|
String inputFileList = inputFileTrees.get().collectMany { fileTree ->
|
|
|
|
fileTree.asList()
|
|
|
|
}.sort().join("\n")
|
|
|
|
project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8")
|
|
|
|
|
|
|
|
// Run rat via ant.
|
2021-08-24 07:13:26 -04:00
|
|
|
ant.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) {
|
2021-06-11 03:26:34 -04:00
|
|
|
// Pass all gradle file trees to the ant task (Gradle's internal adapters are used).
|
|
|
|
inputFileTrees.get().each { fileTree ->
|
|
|
|
fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection)
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// BSD 4-clause stuff (is disallowed below)
|
|
|
|
substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") {
|
|
|
|
pattern(substring: "All advertising materials")
|
|
|
|
}
|
|
|
|
|
|
|
|
// BSD-like stuff
|
|
|
|
substringMatcher(licenseFamilyCategory: "BSD ", licenseFamilyName: "Modified BSD License") {
|
|
|
|
// brics automaton
|
|
|
|
pattern(substring: "Copyright (c) 2001-2009 Anders Moeller")
|
|
|
|
// snowball
|
|
|
|
pattern(substring: "Copyright (c) 2001, Dr Martin Porter")
|
|
|
|
// UMASS kstem
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS")
|
|
|
|
// Egothor
|
|
|
|
pattern(substring: "Egothor Software License version 1.00")
|
|
|
|
// JaSpell
|
|
|
|
pattern(substring: "Copyright (c) 2005 Bruno Martins")
|
|
|
|
// d3.js
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS")
|
|
|
|
// highlight.js
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS")
|
|
|
|
}
|
|
|
|
|
|
|
|
// MIT-like
|
|
|
|
substringMatcher(licenseFamilyCategory: "MIT ", licenseFamilyName:"Modified BSD License") {
|
|
|
|
// ICU license
|
|
|
|
pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy")
|
2021-02-18 17:21:21 -05:00
|
|
|
// ui-grid
|
2021-06-11 03:26:34 -04:00
|
|
|
pattern(substring: " ; License: MIT")
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Apache
|
|
|
|
substringMatcher(licenseFamilyCategory: "AL ", licenseFamilyName: "Apache") {
|
|
|
|
pattern(substring: "Licensed to the Apache Software Foundation (ASF) under")
|
|
|
|
// this is the old - school one under some files
|
|
|
|
pattern(substring: 'Licensed under the Apache License, Version 2.0 (the "License")')
|
|
|
|
}
|
|
|
|
|
|
|
|
substringMatcher(licenseFamilyCategory: "GEN ", licenseFamilyName: "Generated") {
|
|
|
|
// svg files generated by gnuplot
|
|
|
|
pattern(substring: "Produced by GNUPLOT")
|
|
|
|
// snowball stemmers generated by snowball compiler
|
LUCENE-9220: regenerate all stemmers/stopwords/test data from snowball 2.0 (#1262)
Previous situation:
* The snowball base classes (Among, SnowballProgram, etc) had accumulated local performance-related changes. There was a task that would also "patch" generated classes (e.g. GermanStemmer) after-the-fact.
* Snowball classes had many "non-changes" from the original such as removal of tabs addition of javadocs, license headers, etc.
* Snowball test data (inputs and expected stems) was incorporated into lucene testing, but this was maintained manually. Also files had become large, making the test too slow (Nightly).
* Snowball stopwords lists from their website were manually maintained. In some cases encoding fixes were manually applied.
* Some generated stemmers (such as Estonian and Armenian) exist in lucene, but have no corresponding `.sbl` file in snowball sources at all.
Besides this mess, snowball project is "moving along" and acquiring new languages, adding non-BSD-licensed test data, huge test data, and other complexity. So it is time to automate the integration better.
New situation:
* Lucene has a `gradle snowball` regeneration task. It works on Linux or Mac only. It checks out their repos, applies the `snowball.patch` in our repository, compiles snowball stemmers, regenerates all java code, applies any adjustments so that our build is happy.
* Tests data is automatically regenerated from the commit hash of the snowball test data repository. Not all languages are tested from their data: only where the license is simple BSD. Test data is also (deterministically) sampled, so that we don't have huge files. We just want to make sure our integration works.
* Randomized tests are still set to test every language with generated fake words. The regeneration task ensures all languages get tested (it writes a simple text file list of them).
* Stopword files are automatically regenerated from the commit hash of the snowball website repository.
* The regeneration procedure is idempotent. This way when stuff does change, you know exactly what happened. For example if test data changes to a different license, you may see a git deletion. Or if a new language/stopwords/test data gets added, you will see git additions.
2020-02-17 12:38:01 -05:00
|
|
|
pattern(substring: "Generated by Snowball")
|
2020-01-15 03:55:41 -05:00
|
|
|
// parsers generated by antlr
|
|
|
|
pattern(substring: "ANTLR GENERATED CODE")
|
|
|
|
}
|
|
|
|
|
|
|
|
approvedLicense(familyName: "Apache")
|
|
|
|
approvedLicense(familyName: "The MIT License")
|
|
|
|
approvedLicense(familyName: "Modified BSD License")
|
|
|
|
approvedLicense(familyName: "Generated")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-11 03:26:34 -04:00
|
|
|
def printUnknownFiles(File reportFile) {
|
|
|
|
def ratXml = new XmlParser().parse(reportFile)
|
2020-01-15 03:55:41 -05:00
|
|
|
def errors = []
|
|
|
|
ratXml.resource.each { resource ->
|
|
|
|
if (resource.'license-approval'.@name[0] == "false") {
|
|
|
|
errors << "Unknown license: ${resource.@name}"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (errors) {
|
|
|
|
throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" +
|
2021-06-11 03:26:34 -04:00
|
|
|
errors.collect{ msg -> " - ${msg}" }.join("\n"))
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@TaskAction
|
2021-06-11 03:26:34 -04:00
|
|
|
def execute() {
|
2020-01-15 03:55:41 -05:00
|
|
|
def origEncoding = System.getProperty("file.encoding")
|
|
|
|
try {
|
2021-06-11 03:26:34 -04:00
|
|
|
File reportFile = xmlReport.get().asFile
|
|
|
|
generateReport(reportFile)
|
|
|
|
printUnknownFiles(reportFile)
|
2020-01-15 03:55:41 -05:00
|
|
|
} finally {
|
|
|
|
if (System.getProperty("file.encoding") != origEncoding) {
|
2021-06-11 03:26:34 -04:00
|
|
|
throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?")
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|