UCENE-10218: Extend validateSourcePatterns task to scan for LTR/RTL unicode to catch "Trojan Source" source code attacks (#425)

Co-authored-by: Dawid Weiss <dawid.weiss@carrotsearch.com>
This commit is contained in:
Uwe Schindler 2021-11-03 17:19:24 +01:00 committed by GitHub
parent f9be01d5cc
commit 1ae6b2a6b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 31 additions and 44 deletions

View File

@ -61,45 +61,8 @@ def extensions = [
'xsl', 'xsl',
] ]
// Create source validation task local for each project's files. // Create source validation task local to each project
subprojects { allprojects {
task validateSourcePatterns(type: ValidateSourcePatternsTask) { task ->
group = 'Verification'
description = 'Validate Source Patterns'
sourceFiles = fileTree(projectDir) {
extensions.each{
include "**/*.${it}"
}
// Don't go into child projects (scanned separately).
childProjects.keySet().each{
exclude "${it}/**"
}
// default excludes.
exclude 'build/**'
exclude '**/.idea/**'
// ignore txt files in source resources and tests.
exclude 'src/**/*.txt'
}
}
// Add source validation to per-project checks as well.
check.dependsOn validateSourcePatterns
}
configure(project(':lucene:benchmark')) {
project.tasks.withType(ValidateSourcePatternsTask) {
sourceFiles.exclude 'data/**'
// Known .txt offenders.
sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt'
}
}
configure(rootProject) {
task validateSourcePatterns(type: ValidateSourcePatternsTask) { task -> task validateSourcePatterns(type: ValidateSourcePatternsTask) { task ->
group = 'Verification' group = 'Verification'
description = 'Validate Source Patterns' description = 'Validate Source Patterns'
@ -116,19 +79,42 @@ configure(rootProject) {
// default excludes. // default excludes.
exclude '**/build/**' exclude '**/build/**'
exclude '**/.idea/**'
if (project == rootProject) {
// ourselves :-) // ourselves :-)
exclude 'gradle/validation/validate-source-patterns.gradle' exclude 'gradle/validation/validate-source-patterns.gradle'
// gradle and idea folders. // gradle and idea folders.
exclude '.gradle/**' exclude '.gradle/**'
exclude '.idea/**' exclude '.idea/**'
} else {
// ignore txt files in source resources and tests.
exclude 'src/**/*.txt'
}
} }
} }
// Add source validation to per-project checks as well.
check.dependsOn validateSourcePatterns check.dependsOn validateSourcePatterns
// Ensure validation runs prior to any compilation task. This also means
// no executable code can leak out to other modules.
tasks.withType(JavaCompile).configureEach {
mustRunAfter validateSourcePatterns
}
} }
configure(project(':lucene:benchmark')) {
project.tasks.withType(ValidateSourcePatternsTask) {
sourceFiles.exclude 'data/**'
// Known .txt offenders.
sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt'
}
}
@CacheableTask @CacheableTask
class ValidateSourcePatternsTask extends DefaultTask { class ValidateSourcePatternsTask extends DefaultTask {
private ProgressLoggerFactory progressLoggerFactory private ProgressLoggerFactory progressLoggerFactory
@ -150,6 +136,7 @@ class ValidateSourcePatternsTask extends DefaultTask {
(~$/(?i)\bno(n|)commit\b/$) : 'nocommit', (~$/(?i)\bno(n|)commit\b/$) : 'nocommit',
(~$/\bTOOD:/$) : 'TOOD instead TODO', (~$/\bTOOD:/$) : 'TOOD instead TODO',
(~$/\t/$) : 'tabs instead spaces', (~$/\t/$) : 'tabs instead spaces',
(~$/[\u202A-\u202E\u2066-\u2069]/$) : 'misuse of RTL/LTR (https://trojansource.codes)',
(~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary', (~$/\Q/**\E((?:\s)|(?:\*))*\Q{@inheritDoc}\E((?:\s)|(?:\*))*\Q*/\E/$) : '{@inheritDoc} on its own is unnecessary',
(~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword', (~$/\$$(?:LastChanged)?Date\b/$) : 'svn keyword',
(~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword', (~$/\$$(?:(?:LastChanged)?Revision|Rev)\b/$) : 'svn keyword',