LUCENE-9977: rat task corrections (proper up-to-date checks, cleanup and rewrite of the task itself). (#178)

This commit is contained in:
Dawid Weiss 2021-06-11 09:26:34 +02:00 committed by GitHub
parent 69ab1447a7
commit 3bedc0871e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 361 additions and 138 deletions

View File

@ -1,3 +1,20 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Declare script dependency versions outside of palantir's // Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies // version unification control. These are not our main dependencies
// but are reused in buildSrc and across applied scripts. // but are reused in buildSrc and across applied scripts.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Developer Scripts # Developer Scripts
This folder contains various useful scripts for developers, mostly related to This folder contains various useful scripts for developers, mostly related to

View File

@ -1,3 +1,20 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import gzip import gzip
import time import time

View File

@ -17,6 +17,7 @@
import groovy.xml.NamespaceBuilder import groovy.xml.NamespaceBuilder
// Configure rat dependencies for use in the custom task.
configure(rootProject) { configure(rootProject) {
configurations { configurations {
ratDeps ratDeps
@ -27,139 +28,126 @@ configure(rootProject) {
} }
} }
// Configure the rat validation task and all scanned directories.
allprojects { allprojects {
task("rat", type: RatTask) { task("rat", type: RatTask) {
group = 'Verification' group = 'Verification'
description = 'Runs Apache Rat checks.' description = 'Runs Apache Rat checks.'
def defaultScanFileTree = project.fileTree(projectDir, {
// Don't check under the project's build folder.
exclude project.buildDir.name
// Exclude any generated stuff.
exclude "src/generated"
// Don't check any of the subprojects - they have their own rat tasks.
exclude subprojects.collect { it.projectDir.name }
// At the module scope we only check selected file patterns as folks have various .gitignore-d resources
// generated by IDEs, etc.
include "**/*.gradle"
include "**/*.xml"
include "**/*.md"
include "**/*.py"
include "**/*.sh"
include "**/*.bat"
// Include selected patterns from any source folders. We could make this
// relative to source sets but it seems to be of little value - all our source sets
// live under 'src' anyway.
include "src/**"
exclude "src/**/*.png"
exclude "src/**/*.txt"
exclude "src/**/*.zip"
exclude "src/**/*.properties"
exclude "src/**/*.utf8"
// Conditionally apply module-specific patterns. We do it here instead
// of reconfiguring each project because the provider can be made lazy
// and it's easier to manage this way.
switch (project.path) {
case ":":
include "gradlew"
include "gradlew.bat"
exclude ".gradle"
exclude ".idea"
exclude ".muse"
exclude ".git"
// Exclude github stuff (templates, workflows).
exclude ".github"
// The root project also includes patterns for the boostrap (buildSrc) and composite
// projects. Include their sources in the scan.
include "buildSrc/src/**"
include "dev-tools/missing-doclet/src/**"
break
case ":lucene:analysis:morfologik":
exclude "src/**/*.info"
exclude "src/**/*.input"
break
case ":lucene:analysis:opennlp":
exclude "src/**/en-test-lemmas.dict"
break
case ":lucene:test-framework":
exclude "src/**/europarl.lines.txt.seek"
break
case ":lucene:analysis:common":
exclude "src/**/*.aff"
exclude "src/**/*.dic"
exclude "src/**/*.good"
exclude "src/**/*.sug"
exclude "src/**/*.wrong"
exclude "src/**/charfilter/*.htm*"
exclude "src/**/*LuceneResourcesWikiPage.html"
exclude "src/**/*.rslp"
break
case ":lucene:benchmark":
exclude "data/"
break
}
})
inputFileTrees.add(defaultScanFileTree)
} }
} }
configure(rootProject) { /**
rat { * An Apache RAT adapter that validates whether files contain acceptable licenses.
includes += [ */
"buildSrc/**/*.java",
"gradle/**/*.gradle",
"lucene/tools/forbiddenApis/**",
"lucene/tools/prettify/**",
]
excludes += [
// Unclear if this needs ASF header, depends on how much was copied from ElasticSearch
"**/ErrorReportingTestListener.java"
]
}
}
configure(project(":lucene:analysis:common")) {
rat {
srcExcludes += [
"**/*.aff",
"**/*.dic",
"**/*.wrong",
"**/*.good",
"**/*.sug",
"**/charfilter/*.htm*",
"**/*LuceneResourcesWikiPage.html"
]
}
}
configure(project(":lucene:analysis:kuromoji")) {
rat {
srcExcludes += [
// whether rat detects this as binary or not is platform dependent?!
"**/bocchan.utf-8"
]
}
}
configure(project(":lucene:analysis:opennlp")) {
rat {
excludes += [
"src/tools/test-model-data/*.txt",
]
}
}
configure(project(":lucene:highlighter")) {
rat {
srcExcludes += [
"**/CambridgeMA.utf8"
]
}
}
configure(project(":lucene:suggest")) {
rat {
srcExcludes += [
"**/Top50KWiki.utf8",
"**/stop-snowball.txt"
]
}
}
// Structure inspired by existing task from Apache Kafka, heavily modified since then.
class RatTask extends DefaultTask { class RatTask extends DefaultTask {
@Input @InputFiles
List<String> includes = [ ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree)
"*.gradle",
"*.xml",
"src/tools/**"
]
@Input
List<String> excludes = []
@Input
List<String> srcExcludes = [
"**/TODO",
"**/*.txt",
"**/*.md",
"**/*.iml",
"build/**"
]
@OutputFile @OutputFile
def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml') RegularFileProperty xmlReport = project.objects.fileProperty().convention(
project.layout.buildDirectory.file("rat/rat-report.xml"))
def generateXmlReport() { def generateReport(File reportFile) {
// Set up ant rat task.
def uri = 'antlib:org.apache.rat.anttasks' def uri = 'antlib:org.apache.rat.anttasks'
def ratClasspath = project.rootProject.configurations.ratDeps.asPath def ratClasspath = project.rootProject.configurations.ratDeps.asPath
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath) ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath)
def rat = NamespaceBuilder.newInstance(ant, uri) def rat = NamespaceBuilder.newInstance(ant, uri)
rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) {
ant.fileset(dir: "${project.projectDir}") {
includes.each { pattern -> ant.include(name: pattern) }
excludes.each { pattern -> ant.exclude(name: pattern) }
}
if (project.plugins.findPlugin(JavaPlugin)) { // Collect all output files for debugging.
def checkSets = [ String inputFileList = inputFileTrees.get().collectMany { fileTree ->
project.sourceSets.main.java.srcDirs, fileTree.asList()
project.sourceSets.test.java.srcDirs, }.sort().join("\n")
] project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8")
project.sourceSets.matching { it.name == 'tools' }.all { // Run rat via ant.
checkSets += project.sourceSets.tools.java.srcDirs rat.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) {
// Pass all gradle file trees to the ant task (Gradle's internal adapters are used).
inputFileTrees.get().each { fileTree ->
fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection)
} }
checkSets.flatten().each { srcLocation ->
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
srcExcludes.each { pattern -> ant.exclude(name: pattern) }
}
}
[
project.sourceSets.main.resources.srcDirs
].flatten().each { srcLocation ->
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
ant.include(name: "META-INF/**")
}
}
}
// The license rules below were manually copied from lucene/common-build.xml, there is currently no mechanism to sync them
// BSD 4-clause stuff (is disallowed below) // BSD 4-clause stuff (is disallowed below)
substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") { substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") {
pattern(substring: "All advertising materials") pattern(substring: "All advertising materials")
@ -214,8 +202,8 @@ class RatTask extends DefaultTask {
} }
} }
def printUnknownFiles() { def printUnknownFiles(File reportFile) {
def ratXml = new XmlParser().parse(xmlReport) def ratXml = new XmlParser().parse(reportFile)
def errors = [] def errors = []
ratXml.resource.each { resource -> ratXml.resource.each { resource ->
if (resource.'license-approval'.@name[0] == "false") { if (resource.'license-approval'.@name[0] == "false") {
@ -229,14 +217,15 @@ class RatTask extends DefaultTask {
} }
@TaskAction @TaskAction
def rat() { def execute() {
def origEncoding = System.getProperty("file.encoding") def origEncoding = System.getProperty("file.encoding")
try { try {
generateXmlReport() File reportFile = xmlReport.get().asFile
printUnknownFiles() generateReport(reportFile)
printUnknownFiles(reportFile)
} finally { } finally {
if (System.getProperty("file.encoding") != origEncoding) { if (System.getProperty("file.encoding") != origEncoding) {
throw new GradleException("Insane: rat changed file.encoding to ${System.getProperty('file.encoding')}?") throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?")
} }
} }
} }

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# JRE Version Migration Guide # JRE Version Migration Guide
If possible, use the same JRE major version at both index and search time. If possible, use the same JRE major version at both index and search time.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Apache Lucene Migration Guide # Apache Lucene Migration Guide
## NativeUnixDirectory removed and replaced by DirectIODirectory (LUCENE-8982) ## NativeUnixDirectory removed and replaced by DirectIODirectory (LUCENE-8982)

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Apache Lucene README file # Apache Lucene README file
## Introduction ## Introduction

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# System Requirements # System Requirements
Apache Lucene runs on Java 11 or greater. Apache Lucene runs on Java 11 or greater.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Index backwards compatibility # Index backwards compatibility
This README describes the approach to maintaining compatibility with indices This README describes the approach to maintaining compatibility with indices

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
![Apache Lucene Logo](lucene_green_300.png) ![Apache Lucene Logo](lucene_green_300.png)
# Apache Lucene™ ${project.version} Documentation # Apache Lucene™ ${project.version} Documentation

View File

@ -1,3 +1,18 @@
@rem Licensed to the Apache Software Foundation (ASF) under one or more
@rem contributor license agreements. See the NOTICE file distributed with
@rem this work for additional information regarding copyright ownership.
@rem The ASF licenses this file to You under the Apache License, Version 2.0
@rem the "License"); you may not use this file except in compliance with
@rem the License. You may obtain a copy of the License at
@rem
@rem http://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@echo off @echo off
@setlocal enabledelayedexpansion @setlocal enabledelayedexpansion

View File

@ -1,5 +1,20 @@
#!/bin/bash #!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LUKE_HOME=$(cd $(dirname $0) && pwd) LUKE_HOME=$(cd $(dirname $0) && pwd)
cd ${LUKE_HOME} cd ${LUKE_HOME}

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Luke # Luke
This is Luke, Apache Lucene low-level index inspection and repair utility. This is Luke, Apache Lucene low-level index inspection and repair utility.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- <!--
This DTD builds on the <a href="LuceneCoreQuery.dtd.html">core Lucene XML syntax</a> and adds support for features found in the "contrib" section of the Lucene project. This DTD builds on the <a href="LuceneCoreQuery.dtd.html">core Lucene XML syntax</a> and adds support for features found in the "contrib" section of the Lucene project.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- <!--
<h3>Background</h3> <h3>Background</h3>
This DTD describes the XML syntax used to perform advanced searches using the core Lucene search engine. The motivation behind the XML query syntax is: This DTD describes the XML syntax used to perform advanced searches using the core Lucene search engine. The motivation behind the XML query syntax is: