LUCENE-9977: rat task corrections (proper up-to-date checks, cleanup and rewrite of the task itself). (#178)

This commit is contained in:
Dawid Weiss 2021-06-11 09:26:34 +02:00 committed by GitHub
parent 69ab1447a7
commit 3bedc0871e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 361 additions and 138 deletions

View File

@ -1,3 +1,20 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Declare script dependency versions outside of palantir's
// version unification control. These are not our main dependencies
// but are reused in buildSrc and across applied scripts.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Developer Scripts
This folder contains various useful scripts for developers, mostly related to

View File

@ -1,3 +1,20 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import gzip
import time

View File

@ -17,6 +17,7 @@
import groovy.xml.NamespaceBuilder
// Configure rat dependencies for use in the custom task.
configure(rootProject) {
configurations {
ratDeps
@ -27,139 +28,126 @@ configure(rootProject) {
}
}
// Configure the rat validation task and all scanned directories.
allprojects {
task("rat", type: RatTask) {
group = 'Verification'
description = 'Runs Apache Rat checks.'
def defaultScanFileTree = project.fileTree(projectDir, {
// Don't check under the project's build folder.
exclude project.buildDir.name
// Exclude any generated stuff.
exclude "src/generated"
// Don't check any of the subprojects - they have their own rat tasks.
exclude subprojects.collect { it.projectDir.name }
// At the module scope we only check selected file patterns as folks have various .gitignore-d resources
// generated by IDEs, etc.
include "**/*.gradle"
include "**/*.xml"
include "**/*.md"
include "**/*.py"
include "**/*.sh"
include "**/*.bat"
// Include selected patterns from any source folders. We could make this
// relative to source sets but it seems to be of little value - all our source sets
// live under 'src' anyway.
include "src/**"
exclude "src/**/*.png"
exclude "src/**/*.txt"
exclude "src/**/*.zip"
exclude "src/**/*.properties"
exclude "src/**/*.utf8"
// Conditionally apply module-specific patterns. We do it here instead
// of reconfiguring each project because the provider can be made lazy
// and it's easier to manage this way.
switch (project.path) {
case ":":
include "gradlew"
include "gradlew.bat"
exclude ".gradle"
exclude ".idea"
exclude ".muse"
exclude ".git"
// Exclude github stuff (templates, workflows).
exclude ".github"
// The root project also includes patterns for the boostrap (buildSrc) and composite
// projects. Include their sources in the scan.
include "buildSrc/src/**"
include "dev-tools/missing-doclet/src/**"
break
case ":lucene:analysis:morfologik":
exclude "src/**/*.info"
exclude "src/**/*.input"
break
case ":lucene:analysis:opennlp":
exclude "src/**/en-test-lemmas.dict"
break
case ":lucene:test-framework":
exclude "src/**/europarl.lines.txt.seek"
break
case ":lucene:analysis:common":
exclude "src/**/*.aff"
exclude "src/**/*.dic"
exclude "src/**/*.good"
exclude "src/**/*.sug"
exclude "src/**/*.wrong"
exclude "src/**/charfilter/*.htm*"
exclude "src/**/*LuceneResourcesWikiPage.html"
exclude "src/**/*.rslp"
break
case ":lucene:benchmark":
exclude "data/"
break
}
})
inputFileTrees.add(defaultScanFileTree)
}
}
configure(rootProject) {
rat {
includes += [
"buildSrc/**/*.java",
"gradle/**/*.gradle",
"lucene/tools/forbiddenApis/**",
"lucene/tools/prettify/**",
]
excludes += [
// Unclear if this needs ASF header, depends on how much was copied from ElasticSearch
"**/ErrorReportingTestListener.java"
]
}
}
configure(project(":lucene:analysis:common")) {
rat {
srcExcludes += [
"**/*.aff",
"**/*.dic",
"**/*.wrong",
"**/*.good",
"**/*.sug",
"**/charfilter/*.htm*",
"**/*LuceneResourcesWikiPage.html"
]
}
}
configure(project(":lucene:analysis:kuromoji")) {
rat {
srcExcludes += [
// whether rat detects this as binary or not is platform dependent?!
"**/bocchan.utf-8"
]
}
}
configure(project(":lucene:analysis:opennlp")) {
rat {
excludes += [
"src/tools/test-model-data/*.txt",
]
}
}
configure(project(":lucene:highlighter")) {
rat {
srcExcludes += [
"**/CambridgeMA.utf8"
]
}
}
configure(project(":lucene:suggest")) {
rat {
srcExcludes += [
"**/Top50KWiki.utf8",
"**/stop-snowball.txt"
]
}
}
// Structure inspired by existing task from Apache Kafka, heavily modified since then.
/**
* An Apache RAT adapter that validates whether files contain acceptable licenses.
*/
class RatTask extends DefaultTask {
@Input
List<String> includes = [
"*.gradle",
"*.xml",
"src/tools/**"
]
@Input
List<String> excludes = []
@Input
List<String> srcExcludes = [
"**/TODO",
"**/*.txt",
"**/*.md",
"**/*.iml",
"build/**"
]
@InputFiles
ListProperty<ConfigurableFileTree> inputFileTrees = project.objects.listProperty(ConfigurableFileTree)
@OutputFile
def xmlReport = new File(new File(project.buildDir, 'rat'), 'rat-report.xml')
RegularFileProperty xmlReport = project.objects.fileProperty().convention(
project.layout.buildDirectory.file("rat/rat-report.xml"))
def generateXmlReport() {
def generateReport(File reportFile) {
// Set up ant rat task.
def uri = 'antlib:org.apache.rat.anttasks'
def ratClasspath = project.rootProject.configurations.ratDeps.asPath
ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', uri: uri, classpath: ratClasspath)
def rat = NamespaceBuilder.newInstance(ant, uri)
rat.report(format: 'xml', reportFile: xmlReport, addDefaultLicenseMatchers: true) {
ant.fileset(dir: "${project.projectDir}") {
includes.each { pattern -> ant.include(name: pattern) }
excludes.each { pattern -> ant.exclude(name: pattern) }
// Collect all output files for debugging.
String inputFileList = inputFileTrees.get().collectMany { fileTree ->
fileTree.asList()
}.sort().join("\n")
project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8")
// Run rat via ant.
rat.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) {
// Pass all gradle file trees to the ant task (Gradle's internal adapters are used).
inputFileTrees.get().each { fileTree ->
fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection)
}
if (project.plugins.findPlugin(JavaPlugin)) {
def checkSets = [
project.sourceSets.main.java.srcDirs,
project.sourceSets.test.java.srcDirs,
]
project.sourceSets.matching { it.name == 'tools' }.all {
checkSets += project.sourceSets.tools.java.srcDirs
}
checkSets.flatten().each { srcLocation ->
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
srcExcludes.each { pattern -> ant.exclude(name: pattern) }
}
}
[
project.sourceSets.main.resources.srcDirs
].flatten().each { srcLocation ->
ant.fileset(dir: srcLocation, erroronmissingdir: false) {
ant.include(name: "META-INF/**")
}
}
}
// The license rules below were manually copied from lucene/common-build.xml, there is currently no mechanism to sync them
// BSD 4-clause stuff (is disallowed below)
substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") {
pattern(substring: "All advertising materials")
@ -188,7 +176,7 @@ class RatTask extends DefaultTask {
// ICU license
pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy")
// ui-grid
pattern(substring: " ; License: MIT")
pattern(substring: " ; License: MIT")
}
// Apache
@ -214,8 +202,8 @@ class RatTask extends DefaultTask {
}
}
def printUnknownFiles() {
def ratXml = new XmlParser().parse(xmlReport)
def printUnknownFiles(File reportFile) {
def ratXml = new XmlParser().parse(reportFile)
def errors = []
ratXml.resource.each { resource ->
if (resource.'license-approval'.@name[0] == "false") {
@ -224,19 +212,20 @@ class RatTask extends DefaultTask {
}
if (errors) {
throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" +
errors.collect{ msg -> " - ${msg}" }.join("\n"))
errors.collect{ msg -> " - ${msg}" }.join("\n"))
}
}
@TaskAction
def rat() {
def execute() {
def origEncoding = System.getProperty("file.encoding")
try {
generateXmlReport()
printUnknownFiles()
File reportFile = xmlReport.get().asFile
generateReport(reportFile)
printUnknownFiles(reportFile)
} finally {
if (System.getProperty("file.encoding") != origEncoding) {
throw new GradleException("Insane: rat changed file.encoding to ${System.getProperty('file.encoding')}?")
throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?")
}
}
}

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# JRE Version Migration Guide
If possible, use the same JRE major version at both index and search time.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Apache Lucene Migration Guide
## NativeUnixDirectory removed and replaced by DirectIODirectory (LUCENE-8982)

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Apache Lucene README file
## Introduction

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# System Requirements
Apache Lucene runs on Java 11 or greater.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Index backwards compatibility
This README describes the approach to maintaining compatibility with indices

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
![Apache Lucene Logo](lucene_green_300.png)
# Apache Lucene™ ${project.version} Documentation

View File

@ -1,13 +1,28 @@
@echo off
@setlocal enabledelayedexpansion
cd /d %~dp0
set JAVA_OPTIONS=%JAVA_OPTIONS% -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m
set CLASSPATHS=.\*;.\lib\*;..\core\*;..\codecs\*;..\backward-codecs\*;..\queries\*;..\queryparser\*;..\suggest\*;..\misc\*
for /d %%A in (..\analysis\*) do (
set "CLASSPATHS=!CLASSPATHS!;%%A\*;%%A\lib\*"
)
start javaw -cp %CLASSPATHS% %JAVA_OPTIONS% org.apache.lucene.luke.app.desktop.LukeMain
@rem Licensed to the Apache Software Foundation (ASF) under one or more
@rem contributor license agreements. See the NOTICE file distributed with
@rem this work for additional information regarding copyright ownership.
@rem The ASF licenses this file to You under the Apache License, Version 2.0
@rem the "License"); you may not use this file except in compliance with
@rem the License. You may obtain a copy of the License at
@rem
@rem http://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@echo off
@setlocal enabledelayedexpansion
cd /d %~dp0
set JAVA_OPTIONS=%JAVA_OPTIONS% -Xmx1024m -Xms512m -XX:MaxMetaspaceSize=256m
set CLASSPATHS=.\*;.\lib\*;..\core\*;..\codecs\*;..\backward-codecs\*;..\queries\*;..\queryparser\*;..\suggest\*;..\misc\*
for /d %%A in (..\analysis\*) do (
set "CLASSPATHS=!CLASSPATHS!;%%A\*;%%A\lib\*"
)
start javaw -cp %CLASSPATHS% %JAVA_OPTIONS% org.apache.lucene.luke.app.desktop.LukeMain

View File

@ -1,5 +1,20 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LUKE_HOME=$(cd $(dirname $0) && pwd)
cd ${LUKE_HOME}

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Luke
This is Luke, Apache Lucene low-level index inspection and repair utility.

View File

@ -1,4 +1,21 @@
<!--
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This DTD builds on the <a href="LuceneCoreQuery.dtd.html">core Lucene XML syntax</a> and adds support for features found in the "contrib" section of the Lucene project.
CorePlusExtensionsParser.java is the Java class that encapsulates this parser behaviour.

View File

@ -1,3 +1,20 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
<h3>Background</h3>
This DTD describes the XML syntax used to perform advanced searches using the core Lucene search engine. The motivation behind the XML query syntax is: