2020-01-27 12:05:34 -05:00
|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-01-15 03:55:41 -05:00
|
|
|
import groovy.xml.NamespaceBuilder
|
|
|
|
|
|
|
|
configure(rootProject) {
|
|
|
|
configurations {
|
2020-01-15 04:07:10 -05:00
|
|
|
ratDeps
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
dependencies {
|
2020-01-15 05:44:21 -05:00
|
|
|
ratDeps "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}"
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
allprojects {
|
|
|
|
task("rat", type: RatTask) {
|
|
|
|
group = 'Verification'
|
|
|
|
description = 'Runs Apache Rat checks.'
|
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(rootProject) {
|
|
|
|
rat {
|
|
|
|
includes += [
|
|
|
|
"buildSrc/**/*.java",
|
2020-01-27 12:05:34 -05:00
|
|
|
"gradle/**/*.gradle",
|
2020-01-15 04:07:10 -05:00
|
|
|
"lucene/tools/forbiddenApis/**",
|
|
|
|
"lucene/tools/prettify/**",
|
|
|
|
]
|
|
|
|
excludes += [
|
|
|
|
// Unclear if this needs ASF header, depends on how much was copied from ElasticSearch
|
|
|
|
"**/ErrorReportingTestListener.java"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":lucene:analysis:common")) {
|
|
|
|
rat {
|
|
|
|
srcExcludes += [
|
|
|
|
"**/*.aff",
|
|
|
|
"**/*.dic",
|
2021-01-20 04:57:27 -05:00
|
|
|
"**/*.wrong",
|
|
|
|
"**/*.good",
|
2021-02-01 04:23:54 -05:00
|
|
|
"**/*.sug",
|
2020-01-15 04:07:10 -05:00
|
|
|
"**/charfilter/*.htm*",
|
|
|
|
"**/*LuceneResourcesWikiPage.html"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":lucene:analysis:kuromoji")) {
|
|
|
|
rat {
|
|
|
|
srcExcludes += [
|
|
|
|
// whether rat detects this as binary or not is platform dependent?!
|
|
|
|
"**/bocchan.utf-8"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":lucene:analysis:opennlp")) {
|
|
|
|
rat {
|
|
|
|
excludes += [
|
|
|
|
"src/tools/test-model-data/*.txt",
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":lucene:highlighter")) {
|
|
|
|
rat {
|
|
|
|
srcExcludes += [
|
|
|
|
"**/CambridgeMA.utf8"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":lucene:suggest")) {
|
|
|
|
rat {
|
|
|
|
srcExcludes += [
|
|
|
|
"**/Top50KWiki.utf8",
|
|
|
|
"**/stop-snowball.txt"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":solr:core")) {
|
|
|
|
rat {
|
|
|
|
srcExcludes += [
|
|
|
|
"**/htmlStripReaderTest.html"
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
2020-01-15 04:07:10 -05:00
|
|
|
}
|
2020-01-15 03:55:41 -05:00
|
|
|
|
2020-01-15 04:07:10 -05:00
|
|
|
configure(project(":solr:webapp")) {
|
|
|
|
rat {
|
|
|
|
includes = [ "**" ]
|
|
|
|
excludes += [
|
|
|
|
"web/img/**",
|
|
|
|
"*.iml",
|
|
|
|
"build.gradle",
|
|
|
|
"build/**",
|
|
|
|
]
|
2020-01-15 03:55:41 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Structure inspired by existing task from Apache Kafka, heavily modified since then.
|
|
|
|
class RatTask extends DefaultTask {
|
|
|
|
@Input
|
|
|
|
List<String> includes = [
|
2020-01-27 12:05:34 -05:00
|
|
|
"*.gradle",
|
2020-01-15 03:55:41 -05:00
|
|
|
"*.xml",
|
|
|
|
"src/tools/**"
|
|
|
|
]
|
|
|
|
|
|
|
|
@Input
|
|
|
|
List<String> excludes = []
|
|
|
|
|
|
|
|
@Input
|
|
|
|
List<String> srcExcludes = [
|
|
|
|
"**/TODO",
|
|
|
|
"**/*.txt",
|
2020-04-26 19:43:04 -04:00
|
|
|
"**/*.md",
|
2020-01-15 03:55:41 -05:00
|
|
|
"**/*.iml",
|
|
|
|
"build/**"
|
|
|
|
]
|
|
|
|
|
|
|
|
@OutputFile
|
|
|
|
def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml')
|
|
|
|
|
|
|
|
def generateXmlReport() {
|
|
|
|
def uri = 'antlib:org.apache.rat.anttasks'
|
2020-01-15 04:07:10 -05:00
|
|
|
def ratClasspath = project.rootProject.configurations.ratDeps.asPath
|
2020-01-15 03:55:41 -05:00
|
|
|
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath)
|
|
|
|
|
|
|
|
def rat = NamespaceBuilder.newInstance(ant, uri)
|
|
|
|
rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) {
|
|
|
|
ant.fileset(dir: "${project.projectDir}") {
|
|
|
|
includes.each { pattern -> ant.include(name: pattern) }
|
|
|
|
excludes.each { pattern -> ant.exclude(name: pattern) }
|
|
|
|
}
|
|
|
|
|
|
|
|
if (project.plugins.findPlugin(JavaPlugin)) {
|
|
|
|
[
|
|
|
|
project.sourceSets.main.java.srcDirs,
|
|
|
|
project.sourceSets.test.java.srcDirs,
|
|
|
|
].flatten().each { srcLocation ->
|
|
|
|
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
|
|
|
|
srcExcludes.each { pattern -> ant.exclude(name: pattern) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
[
|
|
|
|
project.sourceSets.main.resources.srcDirs
|
|
|
|
].flatten().each { srcLocation ->
|
|
|
|
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
|
|
|
|
ant.include(name: "META-INF/**")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The license rules below were manually copied from lucene/common-build.xml, there is currently no mechanism to sync them
|
|
|
|
|
|
|
|
// BSD 4-clause stuff (is disallowed below)
|
|
|
|
substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") {
|
|
|
|
pattern(substring: "All advertising materials")
|
|
|
|
}
|
|
|
|
|
|
|
|
// BSD-like stuff
|
|
|
|
substringMatcher(licenseFamilyCategory: "BSD ", licenseFamilyName: "Modified BSD License") {
|
|
|
|
// brics automaton
|
|
|
|
pattern(substring: "Copyright (c) 2001-2009 Anders Moeller")
|
|
|
|
// snowball
|
|
|
|
pattern(substring: "Copyright (c) 2001, Dr Martin Porter")
|
|
|
|
// UMASS kstem
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS")
|
|
|
|
// Egothor
|
|
|
|
pattern(substring: "Egothor Software License version 1.00")
|
|
|
|
// JaSpell
|
|
|
|
pattern(substring: "Copyright (c) 2005 Bruno Martins")
|
|
|
|
// d3.js
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS")
|
|
|
|
// highlight.js
|
|
|
|
pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS")
|
|
|
|
}
|
|
|
|
|
|
|
|
// MIT-like
|
|
|
|
substringMatcher(licenseFamilyCategory: "MIT ", licenseFamilyName:"Modified BSD License") {
|
|
|
|
// ICU license
|
|
|
|
pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Apache
|
|
|
|
substringMatcher(licenseFamilyCategory: "AL ", licenseFamilyName: "Apache") {
|
|
|
|
pattern(substring: "Licensed to the Apache Software Foundation (ASF) under")
|
|
|
|
// this is the old - school one under some files
|
|
|
|
pattern(substring: 'Licensed under the Apache License, Version 2.0 (the "License")')
|
|
|
|
}
|
|
|
|
|
|
|
|
substringMatcher(licenseFamilyCategory: "GEN ", licenseFamilyName: "Generated") {
|
|
|
|
// svg files generated by gnuplot
|
|
|
|
pattern(substring: "Produced by GNUPLOT")
|
|
|
|
// snowball stemmers generated by snowball compiler
|
LUCENE-9220: regenerate all stemmers/stopwords/test data from snowball 2.0 (#1262)
Previous situation:
* The snowball base classes (Among, SnowballProgram, etc) had accumulated local performance-related changes. There was a task that would also "patch" generated classes (e.g. GermanStemmer) after-the-fact.
* Snowball classes had many "non-changes" from the original such as removal of tabs addition of javadocs, license headers, etc.
* Snowball test data (inputs and expected stems) was incorporated into lucene testing, but this was maintained manually. Also files had become large, making the test too slow (Nightly).
* Snowball stopwords lists from their website were manually maintained. In some cases encoding fixes were manually applied.
* Some generated stemmers (such as Estonian and Armenian) exist in lucene, but have no corresponding `.sbl` file in snowball sources at all.
Besides this mess, snowball project is "moving along" and acquiring new languages, adding non-BSD-licensed test data, huge test data, and other complexity. So it is time to automate the integration better.
New situation:
* Lucene has a `gradle snowball` regeneration task. It works on Linux or Mac only. It checks out their repos, applies the `snowball.patch` in our repository, compiles snowball stemmers, regenerates all java code, applies any adjustments so that our build is happy.
* Tests data is automatically regenerated from the commit hash of the snowball test data repository. Not all languages are tested from their data: only where the license is simple BSD. Test data is also (deterministically) sampled, so that we don't have huge files. We just want to make sure our integration works.
* Randomized tests are still set to test every language with generated fake words. The regeneration task ensures all languages get tested (it writes a simple text file list of them).
* Stopword files are automatically regenerated from the commit hash of the snowball website repository.
* The regeneration procedure is idempotent. This way when stuff does change, you know exactly what happened. For example if test data changes to a different license, you may see a git deletion. Or if a new language/stopwords/test data gets added, you will see git additions.
2020-02-17 12:38:01 -05:00
|
|
|
pattern(substring: "Generated by Snowball")
|
2020-01-15 03:55:41 -05:00
|
|
|
// parsers generated by antlr
|
|
|
|
pattern(substring: "ANTLR GENERATED CODE")
|
|
|
|
}
|
|
|
|
|
|
|
|
approvedLicense(familyName: "Apache")
|
|
|
|
approvedLicense(familyName: "The MIT License")
|
|
|
|
approvedLicense(familyName: "Modified BSD License")
|
|
|
|
approvedLicense(familyName: "Generated")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def printUnknownFiles() {
|
|
|
|
def ratXml = new XmlParser().parse(xmlReport)
|
|
|
|
def errors = []
|
|
|
|
ratXml.resource.each { resource ->
|
|
|
|
if (resource.'license-approval'.@name[0] == "false") {
|
|
|
|
errors << "Unknown license: ${resource.@name}"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (errors) {
|
|
|
|
throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" +
|
|
|
|
errors.collect{ msg -> " - ${msg}" }.join("\n"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@TaskAction
|
|
|
|
def rat() {
|
|
|
|
def origEncoding = System.getProperty("file.encoding")
|
|
|
|
try {
|
|
|
|
generateXmlReport()
|
|
|
|
printUnknownFiles()
|
|
|
|
} finally {
|
|
|
|
if (System.getProperty("file.encoding") != origEncoding) {
|
|
|
|
throw new GradleException("Insane: rat changed file.encoding to ${System.getProperty('file.encoding')}?")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|