/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.nio.charset.CharacterCodingException; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import org.apache.rat.Defaults import org.apache.rat.document.impl.FileDocument import org.apache.rat.api.MetaData import javax.inject.Inject; import org.gradle.internal.logging.progress.ProgressLoggerFactory import org.gradle.internal.logging.progress.ProgressLogger buildscript { repositories { mavenCentral() } dependencies { classpath "org.apache.rat:apache-rat:${scriptDepVersions['apache-rat']}" } } def extensions = [ 'adoc', 'bat', 'cmd', 'css', 'g4', 'gradle', 'groovy', 'html', 'java', 'jflex', 'jj', 'js', 'json', 'md', 'mdtext', 'pl', 'policy', 'properties', 'py', 'sh', 'template', 'txt', 'vm', 'xml', 'xsl', ] // Create source validation task local to each project allprojects { task validateSourcePatterns(type: ValidateSourcePatternsTask) { task -> group = 'Verification' description = 'Validate Source Patterns' sourceFiles = fileTree(projectDir) { extensions.each{ include "**/*.${it}" } // Don't go into child projects (scanned separately). childProjects.keySet().each{ exclude "${it}/**" } // default excludes. exclude '**/build/**' exclude '**/.idea/**' exclude '**/.gradle/**' if (project == rootProject) { // ourselves :-) exclude 'gradle/validation/validate-source-patterns.gradle' } else { // ignore txt files in source resources and tests. exclude 'src/**/*.txt' } } } // Add source validation to per-project checks as well. check.dependsOn validateSourcePatterns // Ensure validation runs prior to any compilation task. This also means // no executable code can leak out to other modules. tasks.withType(JavaCompile).configureEach { mustRunAfter validateSourcePatterns } } configure(project(':lucene:benchmark')) { project.tasks.withType(ValidateSourcePatternsTask) { sourceFiles.exclude 'data/**' sourceFiles.exclude 'work/**' // Known .txt offenders. sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt' } } @CacheableTask class ValidateSourcePatternsTask extends DefaultTask { private static final Object ratBug = new Object() private ProgressLoggerFactory progressLoggerFactory @InputFiles @PathSensitive(PathSensitivity.RELATIVE) @IgnoreEmptyDirectories FileTree sourceFiles @Inject ValidateSourcePatternsTask(ProgressLoggerFactory progressLoggerFactory) { this.progressLoggerFactory = progressLoggerFactory } @TaskAction public void check() { def invalidPatterns = [ (~$/@author\b/$) : '@author javadoc tag', (~$/(?i)\bno(n|)commit\b/$) : 'nocommit', (~$/\bTOOD:/$) : 'TOOD instead TODO', (~$/\t/$) : 'tabs instead spaces', (~$/[\u202A-\u202E\u2066-\u2069]/$) : 'misuse of RTL/LTR (https://trojansource.codes)', (~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary', (~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword', (~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword', (~$/\$$(?:LastChangedBy|Author)\b/$) : 'svn keyword', (~$/\$$(?:Head)?URL\b/$) : 'svn keyword', (~$/\$$Id\b/$) : 'svn keyword', (~$/\$$Header\b/$) : 'svn keyword', (~$/\$$Source\b/$) : 'svn keyword', (~$/[\u200B\uFEFF]/$) : 'UTF-8 byte order mark or other zero-width codepoints', (~$/import java\.lang\.\w+;/$) : 'java.lang import is unnecessary', ] // Python and others merrily use var declarations, this is a problem _only_ in Java at least for 8x where we're forbidding var declarations def invalidJavaOnlyPatterns = [ (~$/\n\s*var\s+.*=.*<>.*/$) : 'Diamond operators should not be used with var', (~$/import\s+\w+(\.\w+)\.\*;/$) : 'Expand wildcard imports into explicit imports' ] def violations = new TreeSet(); def reportViolation = { f, name -> String msg = String.format(Locale.ROOT, "%s: %s", f, name) logger.error(msg) violations.add(msg) } def javadocsPattern = ~$/(?sm)^\Q/**\E(.*?)\Q*/\E/$; def javaCommentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$; def xmlCommentPattern = ~$/(?sm)\Q\E/$; def lineSplitter = ~$/[\r\n]+/$; def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$; def xmlTagPattern = ~$/(?m)\s*<[a-zA-Z].*/$; def validSPINameJavadocTag = ~$/(?s)\s*\*\s*@lucene\.spi\s+\{@value #NAME\}/$; def isLicense = { matcher, ratDocument -> // See LUCENE-10419 - rat is not thread safe. synchronized (ratBug) { def licenseMatcher = Defaults.createDefaultMatcher(); licenseMatcher.reset() return lineSplitter.split(matcher.group(1)).any { licenseMatcher.match(ratDocument, it) } } } def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument -> def contentMatcher = contentPattern.matcher(text); if (contentMatcher.find()) { def contentStartPos = contentMatcher.start(); def commentMatcher = commentPattern.matcher(text); while (commentMatcher.find()) { if (isLicense(commentMatcher, ratDocument)) { if (commentMatcher.start() < contentStartPos) { break; // This file is all good, so break loop: license header precedes 'description' definition } else { reportViolation(f, description+' declaration precedes license header'); } } } } } ProgressLogger progress = progressLoggerFactory.newOperation(this.class) progress.start(this.name, this.name) def validatingDecoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT) sourceFiles.each { f -> try { progress.progress("Scanning ${f.name}") logger.debug('Scanning source file: {}', f); String text try { validatingDecoder.reset() text = f.withInputStream { in -> new InputStreamReader(in, validatingDecoder).getText() } } catch (CharacterCodingException e) { reportViolation(f, "incorrect UTF-8 encoding [${e}]") return // we can't proceed for this file } invalidPatterns.each { pattern, name -> def matcher = pattern.matcher(text); if (matcher.find()) { reportViolation(f, String.format(Locale.ROOT, '%s [start=%d, end=%d]', name, matcher.start(), matcher.end())); } } def javadocsMatcher = javadocsPattern.matcher(text); def ratDocument = new FileDocument(f); while (javadocsMatcher.find()) { if (isLicense(javadocsMatcher, ratDocument)) { reportViolation(f, String.format(Locale.ENGLISH, 'javadoc-style license header [%s]', ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME))); } } if (f.name.endsWith('.java')) { // make sure that SPI names of all tokenizers/charfilters/tokenfilters are documented if (!f.name.contains("Test") && !f.name.contains("Mock") && !f.name.contains("Fake") && !text.contains("abstract class") && !f.name.equals("TokenizerFactory.java") && !f.name.equals("CharFilterFactory.java") && !f.name.equals("TokenFilterFactory.java") && (f.name.contains("TokenizerFactory") && text.contains("extends TokenizerFactory") || f.name.contains("CharFilterFactory") && text.contains("extends CharFilterFactory") || f.name.contains("FilterFactory") && text.contains("extends TokenFilterFactory"))) { if (!validSPINameJavadocTag.matcher(text).find()) { reportViolation(f, 'invalid spi name documentation') } } checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument); invalidJavaOnlyPatterns.each { pattern, name -> def matcher = pattern.matcher(text); if (matcher.find()) { reportViolation(f, String.format(Locale.ROOT, '%s [start=%d, end=%d]', name, matcher.start(), matcher.end())); } } } if (f.name.endsWith('.xml')) { checkLicenseHeaderPrecedes(f, '', xmlTagPattern, xmlCommentPattern, text, ratDocument); } } catch (e) { e.printStackTrace() throw new GradleException("Unhandled exception while validating patterns on file: " + f, e) } } progress.completed() if (!violations.isEmpty()) { throw new GradleException(String.format(Locale.ENGLISH, 'Found %d source violation(s):\n %s', violations.size(), violations.join('\n '))) } } }