From 3bedc0871e96429fb7eb0f6b9fb3f97ffa3e10d2 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 11 Jun 2021 09:26:34 +0200 Subject: [PATCH] LUCENE-9977: rat task corrections (proper up-to-date checks, cleanup and rewrite of the task itself). (#178) --- buildSrc/scriptDepVersions.gradle | 17 ++ dev-tools/scripts/README.md | 17 ++ dev-tools/scripts/create_line_file_docs.py | 17 ++ gradle/validation/rat-sources.gradle | 237 +++++++++--------- lucene/JRE_VERSION_MIGRATION.md | 17 ++ lucene/MIGRATE.md | 17 ++ lucene/README.md | 17 ++ lucene/SYSTEM_REQUIREMENTS.md | 17 ++ lucene/backward-codecs/README.md | 17 ++ .../src/markdown/index.template.md | 17 ++ lucene/luke/bin/luke.bat | 41 ++- lucene/luke/bin/luke.sh | 15 ++ lucene/luke/src/distribution/README.md | 17 ++ .../queryparser/xml/LuceneContribQuery.dtd | 19 +- .../queryparser/xml/LuceneCoreQuery.dtd | 17 ++ 15 files changed, 361 insertions(+), 138 deletions(-) diff --git a/buildSrc/scriptDepVersions.gradle b/buildSrc/scriptDepVersions.gradle index 5578bc8f23b..5eb00058671 100644 --- a/buildSrc/scriptDepVersions.gradle +++ b/buildSrc/scriptDepVersions.gradle @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + // Declare script dependency versions outside of palantir's // version unification control. These are not our main dependencies // but are reused in buildSrc and across applied scripts. diff --git a/dev-tools/scripts/README.md b/dev-tools/scripts/README.md index 94e69645177..fceee98849d 100644 --- a/dev-tools/scripts/README.md +++ b/dev-tools/scripts/README.md @@ -1,3 +1,20 @@ + + # Developer Scripts This folder contains various useful scripts for developers, mostly related to diff --git a/dev-tools/scripts/create_line_file_docs.py b/dev-tools/scripts/create_line_file_docs.py index 875cd6520f0..55417d4634e 100644 --- a/dev-tools/scripts/create_line_file_docs.py +++ b/dev-tools/scripts/create_line_file_docs.py @@ -1,3 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import gzip import time diff --git a/gradle/validation/rat-sources.gradle b/gradle/validation/rat-sources.gradle index 29bebda37fc..19810f6e785 100644 --- a/gradle/validation/rat-sources.gradle +++ b/gradle/validation/rat-sources.gradle @@ -17,6 +17,7 @@ import groovy.xml.NamespaceBuilder +// Configure rat dependencies for use in the custom task. configure(rootProject) { configurations { ratDeps @@ -27,139 +28,126 @@ configure(rootProject) { } } +// Configure the rat validation task and all scanned directories. allprojects { task("rat", type: RatTask) { group = 'Verification' description = 'Runs Apache Rat checks.' + + def defaultScanFileTree = project.fileTree(projectDir, { + // Don't check under the project's build folder. + exclude project.buildDir.name + + // Exclude any generated stuff. + exclude "src/generated" + + // Don't check any of the subprojects - they have their own rat tasks. + exclude subprojects.collect { it.projectDir.name } + + // At the module scope we only check selected file patterns as folks have various .gitignore-d resources + // generated by IDEs, etc. + include "**/*.gradle" + include "**/*.xml" + include "**/*.md" + include "**/*.py" + include "**/*.sh" + include "**/*.bat" + + // Include selected patterns from any source folders. We could make this + // relative to source sets but it seems to be of little value - all our source sets + // live under 'src' anyway. + include "src/**" + exclude "src/**/*.png" + exclude "src/**/*.txt" + exclude "src/**/*.zip" + exclude "src/**/*.properties" + exclude "src/**/*.utf8" + + // Conditionally apply module-specific patterns. We do it here instead + // of reconfiguring each project because the provider can be made lazy + // and it's easier to manage this way. + switch (project.path) { + case ":": + include "gradlew" + include "gradlew.bat" + exclude ".gradle" + exclude ".idea" + exclude ".muse" + exclude ".git" + + // Exclude github stuff (templates, workflows). + exclude ".github" + + // The root project also includes patterns for the boostrap (buildSrc) and composite + // projects. Include their sources in the scan. + include "buildSrc/src/**" + include "dev-tools/missing-doclet/src/**" + break + + case ":lucene:analysis:morfologik": + exclude "src/**/*.info" + exclude "src/**/*.input" + break + + case ":lucene:analysis:opennlp": + exclude "src/**/en-test-lemmas.dict" + break + + case ":lucene:test-framework": + exclude "src/**/europarl.lines.txt.seek" + break + + case ":lucene:analysis:common": + exclude "src/**/*.aff" + exclude "src/**/*.dic" + exclude "src/**/*.good" + exclude "src/**/*.sug" + exclude "src/**/*.wrong" + exclude "src/**/charfilter/*.htm*" + exclude "src/**/*LuceneResourcesWikiPage.html" + exclude "src/**/*.rslp" + break + + case ":lucene:benchmark": + exclude "data/" + break + } + }) + inputFileTrees.add(defaultScanFileTree) } } -configure(rootProject) { - rat { - includes += [ - "buildSrc/**/*.java", - "gradle/**/*.gradle", - "lucene/tools/forbiddenApis/**", - "lucene/tools/prettify/**", - ] - excludes += [ - // Unclear if this needs ASF header, depends on how much was copied from ElasticSearch - "**/ErrorReportingTestListener.java" - ] - } -} - -configure(project(":lucene:analysis:common")) { - rat { - srcExcludes += [ - "**/*.aff", - "**/*.dic", - "**/*.wrong", - "**/*.good", - "**/*.sug", - "**/charfilter/*.htm*", - "**/*LuceneResourcesWikiPage.html" - ] - } -} - -configure(project(":lucene:analysis:kuromoji")) { - rat { - srcExcludes += [ - // whether rat detects this as binary or not is platform dependent?! - "**/bocchan.utf-8" - ] - } -} - -configure(project(":lucene:analysis:opennlp")) { - rat { - excludes += [ - "src/tools/test-model-data/*.txt", - ] - } -} - -configure(project(":lucene:highlighter")) { - rat { - srcExcludes += [ - "**/CambridgeMA.utf8" - ] - } -} - -configure(project(":lucene:suggest")) { - rat { - srcExcludes += [ - "**/Top50KWiki.utf8", - "**/stop-snowball.txt" - ] - } -} - -// Structure inspired by existing task from Apache Kafka, heavily modified since then. +/** + * An Apache RAT adapter that validates whether files contain acceptable licenses. + */ class RatTask extends DefaultTask { - @Input - List includes = [ - "*.gradle", - "*.xml", - "src/tools/**" - ] - - @Input - List excludes = [] - - @Input - List srcExcludes = [ - "**/TODO", - "**/*.txt", - "**/*.md", - "**/*.iml", - "build/**" - ] + @InputFiles + ListProperty inputFileTrees = project.objects.listProperty(ConfigurableFileTree) @OutputFile - def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml') + RegularFileProperty xmlReport = project.objects.fileProperty().convention( + project.layout.buildDirectory.file("rat/rat-report.xml")) - def generateXmlReport() { + def generateReport(File reportFile) { + // Set up ant rat task. def uri = 'antlib:org.apache.rat.anttasks' def ratClasspath = project.rootProject.configurations.ratDeps.asPath ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath) - def rat = NamespaceBuilder.newInstance(ant, uri) - rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) { - ant.fileset(dir: "${project.projectDir}") { - includes.each { pattern -> ant.include(name: pattern) } - excludes.each { pattern -> ant.exclude(name: pattern) } + + // Collect all output files for debugging. + String inputFileList = inputFileTrees.get().collectMany { fileTree -> + fileTree.asList() + }.sort().join("\n") + project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8") + + // Run rat via ant. + rat.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) { + // Pass all gradle file trees to the ant task (Gradle's internal adapters are used). + inputFileTrees.get().each { fileTree -> + fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection) } - if (project.plugins.findPlugin(JavaPlugin)) { - def checkSets = [ - project.sourceSets.main.java.srcDirs, - project.sourceSets.test.java.srcDirs, - ] - - project.sourceSets.matching { it.name == 'tools' }.all { - checkSets += project.sourceSets.tools.java.srcDirs - } - - checkSets.flatten().each { srcLocation -> - ant.fileset(dir: srcLocation, erroronmissingdir: false) { - srcExcludes.each { pattern -> ant.exclude(name: pattern) } - } - } - - [ - project.sourceSets.main.resources.srcDirs - ].flatten().each { srcLocation -> - ant.fileset(dir: srcLocation, erroronmissingdir: false) { - ant.include(name: "META-INF/**") - } - } - } - - // The license rules below were manually copied from lucene/common-build.xml, there is currently no mechanism to sync them - // BSD 4-clause stuff (is disallowed below) substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") { pattern(substring: "All advertising materials") @@ -188,7 +176,7 @@ class RatTask extends DefaultTask { // ICU license pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy") // ui-grid - pattern(substring: " ; License: MIT") + pattern(substring: " ; License: MIT") } // Apache @@ -214,8 +202,8 @@ class RatTask extends DefaultTask { } } - def printUnknownFiles() { - def ratXml = new XmlParser().parse(xmlReport) + def printUnknownFiles(File reportFile) { + def ratXml = new XmlParser().parse(reportFile) def errors = [] ratXml.resource.each { resource -> if (resource.'license-approval'.@name[0] == "false") { @@ -224,19 +212,20 @@ class RatTask extends DefaultTask { } if (errors) { throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" + - errors.collect{ msg -> " - ${msg}" }.join("\n")) + errors.collect{ msg -> " - ${msg}" }.join("\n")) } } @TaskAction - def rat() { + def execute() { def origEncoding = System.getProperty("file.encoding") try { - generateXmlReport() - printUnknownFiles() + File reportFile = xmlReport.get().asFile + generateReport(reportFile) + printUnknownFiles(reportFile) } finally { if (System.getProperty("file.encoding") != origEncoding) { - throw new GradleException("Insane: rat changed file.encoding to ${System.getProperty('file.encoding')}?") + throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?") } } } diff --git a/lucene/JRE_VERSION_MIGRATION.md b/lucene/JRE_VERSION_MIGRATION.md index 2d4f6026214..90196d22ae1 100644 --- a/lucene/JRE_VERSION_MIGRATION.md +++ b/lucene/JRE_VERSION_MIGRATION.md @@ -1,3 +1,20 @@ + + # JRE Version Migration Guide If possible, use the same JRE major version at both index and search time. diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index e32cb2c3781..81ae114d17b 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -1,3 +1,20 @@ + + # Apache Lucene Migration Guide ## NativeUnixDirectory removed and replaced by DirectIODirectory (LUCENE-8982) diff --git a/lucene/README.md b/lucene/README.md index 92e1eab9218..b2d1620db16 100644 --- a/lucene/README.md +++ b/lucene/README.md @@ -1,3 +1,20 @@ + + # Apache Lucene README file ## Introduction diff --git a/lucene/SYSTEM_REQUIREMENTS.md b/lucene/SYSTEM_REQUIREMENTS.md index 5ea6b3cdf8c..3e442761057 100644 --- a/lucene/SYSTEM_REQUIREMENTS.md +++ b/lucene/SYSTEM_REQUIREMENTS.md @@ -1,3 +1,20 @@ + + # System Requirements Apache Lucene runs on Java 11 or greater. diff --git a/lucene/backward-codecs/README.md b/lucene/backward-codecs/README.md index 9c1f617702a..a96fd781f65 100644 --- a/lucene/backward-codecs/README.md +++ b/lucene/backward-codecs/README.md @@ -1,3 +1,20 @@ + + # Index backwards compatibility This README describes the approach to maintaining compatibility with indices diff --git a/lucene/documentation/src/markdown/index.template.md b/lucene/documentation/src/markdown/index.template.md index cfcb11796e0..97b3ec81577 100644 --- a/lucene/documentation/src/markdown/index.template.md +++ b/lucene/documentation/src/markdown/index.template.md @@ -1,3 +1,20 @@ + + ![Apache Lucene Logo](lucene_green_300.png) # Apache Luceneā„¢ ${project.version} Documentation diff --git a/lucene/luke/bin/luke.bat b/lucene/luke/bin/luke.bat index 4d83d8bf319..2f3fc9757b8 100644 --- a/lucene/luke/bin/luke.bat +++ b/lucene/luke/bin/luke.bat @@ -1,13 +1,28 @@ -@echo off -@setlocal enabledelayedexpansion - -cd /d %~dp0 - -set JAVA_OPTIONS=%JAVA_OPTIONS% -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m - -set CLASSPATHS=.\*;.\lib\*;..\core\*;..\codecs\*;..\backward-codecs\*;..\queries\*;..\queryparser\*;..\suggest\*;..\misc\* -for /d %%A in (..\analysis\*) do ( - set "CLASSPATHS=!CLASSPATHS!;%%A\*;%%A\lib\*" -) - -start javaw -cp %CLASSPATHS% %JAVA_OPTIONS% org.apache.lucene.luke.app.desktop.LukeMain +@rem Licensed to the Apache Software Foundation (ASF) under one or more +@rem contributor license agreements. See the NOTICE file distributed with +@rem this work for additional information regarding copyright ownership. +@rem The ASF licenses this file to You under the Apache License, Version 2.0 +@rem the "License"); you may not use this file except in compliance with +@rem the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@echo off +@setlocal enabledelayedexpansion + +cd /d %~dp0 + +set JAVA_OPTIONS=%JAVA_OPTIONS% -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m + +set CLASSPATHS=.\*;.\lib\*;..\core\*;..\codecs\*;..\backward-codecs\*;..\queries\*;..\queryparser\*;..\suggest\*;..\misc\* +for /d %%A in (..\analysis\*) do ( + set "CLASSPATHS=!CLASSPATHS!;%%A\*;%%A\lib\*" +) + +start javaw -cp %CLASSPATHS% %JAVA_OPTIONS% org.apache.lucene.luke.app.desktop.LukeMain diff --git a/lucene/luke/bin/luke.sh b/lucene/luke/bin/luke.sh index 7c7d9191056..814eefb7536 100755 --- a/lucene/luke/bin/luke.sh +++ b/lucene/luke/bin/luke.sh @@ -1,5 +1,20 @@ #!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + LUKE_HOME=$(cd $(dirname $0) && pwd) cd ${LUKE_HOME} diff --git a/lucene/luke/src/distribution/README.md b/lucene/luke/src/distribution/README.md index da12b979202..b0672ae2bca 100644 --- a/lucene/luke/src/distribution/README.md +++ b/lucene/luke/src/distribution/README.md @@ -1,3 +1,20 @@ + + # Luke This is Luke, Apache Lucene low-level index inspection and repair utility. diff --git a/lucene/queryparser/src/resources/org/apache/lucene/queryparser/xml/LuceneContribQuery.dtd b/lucene/queryparser/src/resources/org/apache/lucene/queryparser/xml/LuceneContribQuery.dtd index 881a0a15aae..193907e3627 100644 --- a/lucene/queryparser/src/resources/org/apache/lucene/queryparser/xml/LuceneContribQuery.dtd +++ b/lucene/queryparser/src/resources/org/apache/lucene/queryparser/xml/LuceneContribQuery.dtd @@ -1,4 +1,21 @@ - + + +