lucene/gradle/validation/validate-log-calls.gradle

246 lines
9.4 KiB
Groovy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.tools.ant.BuildException
// Checks logging calls to keep from using patterns that might be expensive
// either in CPU or unnecessary object creation. Some of the checks are a little
// over-zealous, but it's worth a bit of aggravation to catch the egregious
// mistakes before they accrue.
// This all started with Java Flight Recorder showing a massive number of
// objects allocated and garbage collected. Tracing a bunch of them down showed
// that we have a number of calls like:
// log.debug("some string" + very_expensive_object_to_make_a_string).
// which _always_ constructed the very expensive ojbect even if the log level
// was INFO or more severe. Rather than leave lots of confusing formats laying
// about or expecting people to remember the rules, we opted to make this
// check part of Gradle precommit. It will be included by default in Solr
// 9.0.
// See the associated help task "gradlew helpValidateLogCalls"
configure(subprojects.findAll { it.path.startsWith(':solr') }) {
plugins.withType(JavaPlugin) {
task validateLogCalls(type: ValidateLogCallsTask) {
description "Checks that log calls are either validated or conform to efficient patterns."
group "verification"
sourceFiles = files(sourceSets*.java)
}
}
}
class ValidateLogCallsTask extends DefaultTask {
def logLevels = ["log.trace", "log.debug", "log.info", "log.warn", "log.error", "log.fatal"]
def errsFound = 0;
def violations = new ArrayList();
def reportViolation(String msg) {
violations.add(System.lineSeparator + msg);
errsFound++;
}
// We have a log.something line, check for patterns we're not fond of.
def checkLine(File file, String line, int lineNumber, String previous) {
boolean violation = false
// If the line has been explicitly OK'd, then it's OK!
if (line.replaceAll("\\s", "").toLowerCase().contains("//logok")) {
return
}
// Strip all of the comments, things in quotes and the like.
def stripped =
line.replaceFirst("//.*", " ") // remove comment to EOL. May be fragile due to the possibility of embedded double slashes
.replaceFirst(/.*?\(/, " ") // Get rid of "log.whatever("
.replaceFirst(/\);/, " ") // get rid of the closing ");"
.replaceFirst("/\\*.*?\\*/", " ") // replace embedded comments "/*....*/"
.replaceAll(/".*?"/, '""') // remove anything between quotes. This is a bit fragile if there are embedded double quotes.
.replaceAll(/timeLeft\(.*?\)/, " ") // used all over tests, it's benign
.replaceAll(/TimeUnit\..*?\.convert\(.*?\)/, " ") // again, a pattern that's efficient
.replaceAll("\\s", "")
// The compiler will pre-assemble patterns like 'log.info("string const1 {}" + " string const2 {}", obj1, obj2)'
// to log.info("string const1 {} string const2 {}", obj1, obj2)', so don't worry about any plus preceded and
// followed by double quotes, otherwise flag it.
def hasPlus = false
for (int idx = 0; idx < stripped.length(); ++idx) {
if (stripped.charAt(idx) == '+') {
if (idx == 0 || idx == stripped.length() - 1
|| stripped.charAt(idx - 1) != '"' || stripped.charAt(idx + 1) != '"') {
hasPlus = true
break
}
}
}
//TODO remove me
String cause = ""
// Get the level of this log message.
// We don't really care about checking for method calls for WARN and more severe, the presumption
// is that they _should_ always be reported.
def level = ""
def lev = (line =~ "log\\.(.*?)\\(")
if (lev.find()) {
level = lev.group(1).toLowerCase().trim()
}
if (logLevels.contains("log." + level) == false) {
reportViolation(String.format("Found an unexpected log level: %s, file: %s:%d"
, level, file.getAbsolutePath(), lineNumber))
}
boolean persnicketyLevel = !(level.equals("fatal") || level.equals("error") || level.equals("warn"))
// Check that previous line isn't an if statement for always-reported log levels.
// There's a convention to declare a member variable for whether a level is enabled and check that rather than
// isDebugEnabled so we need to check both.
String prevLine = previous.replaceAll("\\s+", "").toLowerCase()
boolean prevLineNotIf = ((prevLine.contains("if(log.is" + level + "enabled") == false
&& prevLine.contains("if(" + level + ")") == false))
if (hasPlus) {
cause = "hasPlus: " + hasPlus
violation = true
}
if (violation == false) {
def m = stripped =~ "\\(.*?\\)"
def hasParens = m.find()
if (hasParens && prevLineNotIf && persnicketyLevel) {
cause = "hasParens " + hasParens + " prevLineNotIf " + prevLineNotIf + " pernicketyLevel " + persnicketyLevel
violation = true
}
}
// Always report toString(). Note, this over-reports some constructs
// but just add //logOK if it's really OK.
if (violation == false) {
if (line.contains("toString(") == true && prevLineNotIf) {
cause = "Line contains toString"
violation = true
}
}
if (violation) {
reportViolation(String.format("cause: '%s' Suspicious logging call, Parameterize and possibly surround with 'if (log.is*Enabled) {..}'. Help at: 'gradlew helpValidateLogCalls' %s %s:%d"
, cause , System.lineSeparator, file.getAbsolutePath(), lineNumber))
}
return
}
// Require all our logger definitions lower case "log", except a couple of special ones.
def checkLogName(File file, String line) {
// It's many times faster to do check this way than use a regex
if (line.contains("static ") && line.contains("getLogger") && line.contains(" log ") == false) {
String name = file.name
if (name.equals("LoggerFactory.java")) {
return
}
if (name.equals("SolrCore.java") && (line.contains("requestLog") || line.contains("slowLog"))) {
return
}
if (name.equals("StartupLoggingUtils.java") && line.contains("getLoggerImplStr")) {
return
}
// Apparently the Hadoop code expectes upper-case LOG, so...
if ((name.equals("HttpServer2.java") || name.equals("BlockPoolSlice.java") || name.equals("FileUtil.java"))
&& line.contains(" LOG ")) {
return
}
reportViolation("Change the logger name to lower-case 'log' in " + file.name + " " + line + " project" + project)
}
}
def checkFile(File file) {
int state = 0
int lineNumber = 0
StringBuilder sb = new StringBuilder();
// We have to assemble line-by-line to get the full log statement. We require that all loggers (except requestLog
// and slowLog) be exactly "log". That will be checked as well.
String prevLine = ""
file.eachLine { String line ->
lineNumber++
checkLogName(file, line)
switch (state) {
case 0: // Not collecting a logging line.
logLevels.each {
if (line.contains(it)) {
if (line.contains(");")) {
state = 2
} else {
state = 1
}
sb.setLength(0)
sb.append(line)
}
}
break
case 1: // collecting a log line
if (line.replaceFirst("//.*", " ")
.replaceFirst("/\\*.*?\\*/", " ") // replace embedded comments "/*....*/"
.replaceAll(/".*?"/, '""') // remove anything between quotes. This is a bit fragile if there are embedded double quotes.
.trim().endsWith(");")) {
state = 2
}
sb.append(line)
break
default:
reportViolation("Bad state, should be 0-1 in the switch statement") // make people aware the code sucks
break
}
// It's just easier to do this here rather than read another line in the switch above.
if (state == 0) {
// Not collecting a log line
prevLine = line.toLowerCase();
} else if (state == 1) {
// collecting a logging line.
} else if (state == 2) {
// We've collected the complete log line.
checkLine(file, sb.toString(), lineNumber, prevLine)
state = 0
} else {
assert false
}
}
}
@InputFiles
FileCollection sourceFiles
ValidateLogCallsTask() {
// No explicit outputs (outputs always up to date).
outputs.upToDateWhen { true }
}
@TaskAction
def checkLogLines() {
sourceFiles.each { checkFile(it) }
logger.warn("Checked: ${sourceFiles.files.size()}")
if (errsFound > 0) {
throw new BuildException(String.format(Locale.ENGLISH, 'Found %d violations in source files (%s).',
errsFound, violations.join(', ')));
}
}
}