mirror of
synced 2025-03-06 08:19:23 +00:00
276 lines
9.6 KiB
276 lines
9.6 KiB
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import org.apache.rat.Defaults
import org.apache.rat.document.impl.FileDocument
import org.apache.rat.api.MetaData
import javax.inject.Inject;
import org.gradle.internal.logging.progress.ProgressLoggerFactory
import org.gradle.internal.logging.progress.ProgressLogger
buildscript {
repositories {
dependencies {
classpath deps.rat
def extensions = [
// Create source validation task local to each project
allprojects {
task validateSourcePatterns(type: ValidateSourcePatternsTask) { task ->
group = 'Verification'
description = 'Validate Source Patterns'
sourceFiles = fileTree(projectDir) {
include "**/*.${it}"
// Don't go into child projects (scanned separately).
exclude "${it}/**"
// default excludes.
exclude '**/build/**'
exclude '**/.idea/**'
exclude '**/.gradle/**'
if (project == rootProject) {
// ourselves :-)
exclude 'gradle/validation/validate-source-patterns.gradle'
} else {
// ignore txt files in source resources and tests.
exclude 'src/**/*.txt'
// Add source validation to per-project checks as well.
check.dependsOn validateSourcePatterns
// Ensure validation runs prior to any compilation task. This also means
// no executable code can leak out to other modules.
tasks.withType(JavaCompile).configureEach {
mustRunAfter validateSourcePatterns
configure(project(':lucene:benchmark')) {
project.tasks.withType(ValidateSourcePatternsTask) {
sourceFiles.exclude 'data/**'
sourceFiles.exclude 'work/**'
// Known .txt offenders.
sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt'
class ValidateSourcePatternsTask extends DefaultTask {
private static final Object ratBug = new Object()
private ProgressLoggerFactory progressLoggerFactory
FileTree sourceFiles
ValidateSourcePatternsTask(ProgressLoggerFactory progressLoggerFactory) {
this.progressLoggerFactory = progressLoggerFactory
public void check() {
def invalidPatterns = [
(~$/@author\b/$) : '@author javadoc tag',
(~$/(?i)\bno(n|)commit\b/$) : 'nocommit',
(~$/\bTOOD:/$) : 'TOOD instead TODO',
(~$/\t/$) : 'tabs instead spaces',
(~$/[\u202A-\u202E\u2066-\u2069]/$) : 'misuse of RTL/LTR (https://trojansource.codes)',
(~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary',
(~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword',
(~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword',
(~$/\$$(?:LastChangedBy|Author)\b/$) : 'svn keyword',
(~$/\$$(?:Head)?URL\b/$) : 'svn keyword',
(~$/\$$Id\b/$) : 'svn keyword',
(~$/\$$Header\b/$) : 'svn keyword',
(~$/\$$Source\b/$) : 'svn keyword',
(~$/[\u200B\uFEFF]/$) : 'UTF-8 byte order mark or other zero-width codepoints',
(~$/import java\.lang\.\w+;/$) : 'java.lang import is unnecessary',
// Python and others merrily use var declarations, this is a problem _only_ in Java at least for 8x where we're forbidding var declarations
def invalidJavaOnlyPatterns = [
(~$/\n\s*var\s+.*=.*<>.*/$) : 'Diamond operators should not be used with var',
(~$/import\s+\w+(\.\w+)\.\*;/$) : 'Expand wildcard imports into explicit imports'
def violations = new TreeSet();
def reportViolation = { f, name ->
String msg = String.format(Locale.ROOT, "%s: %s", f, name)
def javadocsPattern = ~$/(?sm)^\Q/**\E(.*?)\Q*/\E/$;
def javaCommentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$;
def xmlCommentPattern = ~$/(?sm)\Q<!--\E(.*?)\Q-->\E/$;
def lineSplitter = ~$/[\r\n]+/$;
def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$;
def xmlTagPattern = ~$/(?m)\s*<[a-zA-Z].*/$;
def validSPINameJavadocTag = ~$/(?s)\s*\*\s*@lucene\.spi\s+\{@value #NAME\}/$;
def isLicense = { matcher, ratDocument ->
// See LUCENE-10419 - rat is not thread safe.
synchronized (ratBug) {
def licenseMatcher = Defaults.createDefaultMatcher();
return lineSplitter.split(matcher.group(1)).any { licenseMatcher.match(ratDocument, it) }
def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument ->
def contentMatcher = contentPattern.matcher(text);
if (contentMatcher.find()) {
def contentStartPos = contentMatcher.start();
def commentMatcher = commentPattern.matcher(text);
while (commentMatcher.find()) {
if (isLicense(commentMatcher, ratDocument)) {
if (commentMatcher.start() < contentStartPos) {
break; // This file is all good, so break loop: license header precedes 'description' definition
} else {
reportViolation(f, description+' declaration precedes license header');
ProgressLogger progress = progressLoggerFactory.newOperation(this.class)
progress.start(this.name, this.name)
def validatingDecoder = StandardCharsets.UTF_8.newDecoder()
sourceFiles.each { f ->
try {
progress.progress("Scanning ${f.name}")
logger.debug('Scanning source file: {}', f);
String text
try {
text = f.withInputStream {
in -> new InputStreamReader(in, validatingDecoder).getText()
} catch (CharacterCodingException e) {
reportViolation(f, "incorrect UTF-8 encoding [${e}]")
return // we can't proceed for this file
invalidPatterns.each { pattern, name ->
def matcher = pattern.matcher(text);
if (matcher.find()) {
reportViolation(f, String.format(Locale.ROOT, '%s [start=%d, end=%d]', name, matcher.start(), matcher.end()));
def javadocsMatcher = javadocsPattern.matcher(text);
def ratDocument = new FileDocument(f);
while (javadocsMatcher.find()) {
if (isLicense(javadocsMatcher, ratDocument)) {
reportViolation(f, String.format(Locale.ENGLISH, 'javadoc-style license header [%s]',
if (f.name.endsWith('.java')) {
// make sure that SPI names of all tokenizers/charfilters/tokenfilters are documented
if (!f.name.contains("Test") && !f.name.contains("Mock") && !f.name.contains("Fake") && !text.contains("abstract class") &&
!f.name.equals("TokenizerFactory.java") && !f.name.equals("CharFilterFactory.java") && !f.name.equals("TokenFilterFactory.java") &&
(f.name.contains("TokenizerFactory") && text.contains("extends TokenizerFactory") ||
f.name.contains("CharFilterFactory") && text.contains("extends CharFilterFactory") ||
f.name.contains("FilterFactory") && text.contains("extends TokenFilterFactory"))) {
if (!validSPINameJavadocTag.matcher(text).find()) {
reportViolation(f, 'invalid spi name documentation')
checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument);
invalidJavaOnlyPatterns.each { pattern, name ->
def matcher = pattern.matcher(text);
if (matcher.find()) {
reportViolation(f, String.format(Locale.ROOT, '%s [start=%d, end=%d]', name, matcher.start(), matcher.end()));
if (f.name.endsWith('.xml')) {
checkLicenseHeaderPrecedes(f, '<tag>', xmlTagPattern, xmlCommentPattern, text, ratDocument);
} catch (e) {
throw new GradleException("Unhandled exception while validating patterns on file: " + f, e)
if (!violations.isEmpty()) {
throw new GradleException(String.format(Locale.ENGLISH, 'Found %d source violation(s):\n %s',
violations.join('\n ')))