From 1ae6b2a6b9df69e26a94a295bd4b42bc0575b202 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Wed, 3 Nov 2021 17:19:24 +0100 Subject: [PATCH] UCENE-10218: Extend validateSourcePatterns task to scan for LTR/RTL unicode to catch "Trojan Source" source code attacks (#425) Co-authored-by: Dawid Weiss --- .../validate-source-patterns.gradle | 75 ++++++++----------- 1 file changed, 31 insertions(+), 44 deletions(-) diff --git a/gradle/validation/validate-source-patterns.gradle b/gradle/validation/validate-source-patterns.gradle index 5b8519091d5..cc01d37e981 100644 --- a/gradle/validation/validate-source-patterns.gradle +++ b/gradle/validation/validate-source-patterns.gradle @@ -61,45 +61,8 @@ def extensions = [ 'xsl', ] -// Create source validation task local for each project's files. -subprojects { - task validateSourcePatterns(type: ValidateSourcePatternsTask) { task -> - group = 'Verification' - description = 'Validate Source Patterns' - - sourceFiles = fileTree(projectDir) { - extensions.each{ - include "**/*.${it}" - } - - // Don't go into child projects (scanned separately). - childProjects.keySet().each{ - exclude "${it}/**" - } - - // default excludes. - exclude 'build/**' - exclude '**/.idea/**' - - // ignore txt files in source resources and tests. - exclude 'src/**/*.txt' - } - } - - // Add source validation to per-project checks as well. - check.dependsOn validateSourcePatterns -} - -configure(project(':lucene:benchmark')) { - project.tasks.withType(ValidateSourcePatternsTask) { - sourceFiles.exclude 'data/**' - - // Known .txt offenders. - sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt' - } -} - -configure(rootProject) { +// Create source validation task local to each project +allprojects { task validateSourcePatterns(type: ValidateSourcePatternsTask) { task -> group = 'Verification' description = 'Validate Source Patterns' @@ -116,19 +79,42 @@ configure(rootProject) { // default excludes. exclude '**/build/**' + exclude '**/.idea/**' - // ourselves :-) - exclude 'gradle/validation/validate-source-patterns.gradle' + if (project == rootProject) { + // ourselves :-) + exclude 'gradle/validation/validate-source-patterns.gradle' - // gradle and idea folders. - exclude '.gradle/**' - exclude '.idea/**' + // gradle and idea folders. + exclude '.gradle/**' + exclude '.idea/**' + } else { + // ignore txt files in source resources and tests. + exclude 'src/**/*.txt' + } } } + // Add source validation to per-project checks as well. check.dependsOn validateSourcePatterns + + // Ensure validation runs prior to any compilation task. This also means + // no executable code can leak out to other modules. + tasks.withType(JavaCompile).configureEach { + mustRunAfter validateSourcePatterns + } } +configure(project(':lucene:benchmark')) { + project.tasks.withType(ValidateSourcePatternsTask) { + sourceFiles.exclude 'data/**' + + // Known .txt offenders. + sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt' + } +} + + @CacheableTask class ValidateSourcePatternsTask extends DefaultTask { private ProgressLoggerFactory progressLoggerFactory @@ -150,6 +136,7 @@ class ValidateSourcePatternsTask extends DefaultTask { (~$/(?i)\bno(n|)commit\b/$) : 'nocommit', (~$/\bTOOD:/$) : 'TOOD instead TODO', (~$/\t/$) : 'tabs instead spaces', + (~$/[\u202A-\u202E\u2066-\u2069]/$) : 'misuse of RTL/LTR (https://trojansource.codes)', (~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary', (~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword', (~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword',