lucene/gradle/validation/validate-log-calls.gradle

309 lines
12 KiB
Groovy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.tools.ant.BuildException
import static java.util.concurrent.TimeUnit.SECONDS
// Checks logging calls to keep from using patterns that might be expensive
// either in CPU or unnecessary object creation
configure(rootProject) {
allprojects {
task validateLogCalls(type: ValidateLogCallsTask) {
description "Checks that log calls are either validated or conform to efficient patterns."
group "verification"
doFirst {
if (project.hasProperty('srcDir')) {
srcDir = project.getProperty('srcDir')
} else { // Remove this later, make it optional
//nocommit
throw new BuildException(String.format(Locale.ENGLISH,
'''Until we get all the calls cleaned up, you MUST specify -PsrcDir=relative_path, e.g.
"-PsrcDir=solr/core/src/java/org/apache/solr/core". This task will recursively check all
"*.java" files under that directory'''))
}
checkPlus = Boolean.valueOf(propertyOrDefault('checkPlus', 'false'))
}
}
}
}
class ValidateLogCallsTask extends DefaultTask {
@Input
String srcDir = ""
@Input
boolean checkPlus
// nocommit, remove when you go to project-based checking.
Set<String> dirsToCheck = ["solr/core/src/java/org/apache/solr/analysis"
, "solr/core/src/java/org/apache/solr/api"
, "solr/core/src/java/org/apache/solr/client"
// , "solr/core/src/java/org/apache/solr/cloud" // 120
, "solr/core/src/java/org/apache/solr/cloud/api"
, "solr/core/src/java/org/apache/solr/cloud/autoscaling"
, "solr/core/src/java/org/apache/solr/cloud/cdcr"
, "solr/core/src/java/org/apache/solr/cloud/hdfs"
, "solr/core/src/java/org/apache/solr/cloud/overseer"
, "solr/core/src/java/org/apache/solr/cloud/rule"
, "solr/core/src/java/org/apache/solr/core"
, "solr/core/src/java/org/apache/solr/filestore"
, "solr/core/src/java/org/apache/solr/handler/admin"
, "solr/core/src/java/org/apache/solr/handler/component"
, "solr/core/src/java/org/apache/solr/handler/export"
, "solr/core/src/java/org/apache/solr/handler/loader"
, "solr/core/src/java/org/apache/solr/handler/tagger"
, "solr/core/src/java/org/apache/solr/highlight"
, "solr/core/src/java/org/apache/solr/index"
, "solr/core/src/java/org/apache/solr/internal"
, "solr/core/src/java/org/apache/solr/legacy"
, "solr/core/src/java/org/apache/solr/logging"
, "solr/core/src/java/org/apache/solr/metrics"
, "solr/core/src/java/org/apache/solr/packagemanager"
, "solr/core/src/java/org/apache/solr/parser"
, "solr/core/src/java/org/apache/solr/pkg"
, "solr/core/src/java/org/apache/solr/query"
, "solr/core/src/java/org/apache/solr/request"
, "solr/core/src/java/org/apache/solr/response"
, "solr/core/src/java/org/apache/solr/rest"
, "solr/core/src/java/org/apache/solr/schema"
, "solr/core/src/java/org/apache/solr/search"
, "solr/core/src/java/org/apache/solr/security"
, "solr/core/src/java/org/apache/solr/servlet"
, "solr/core/src/java/org/apache/solr/spelling"
, "solr/core/src/java/org/apache/solr/store"
, "solr/core/src/java/org/apache/solr/uninverting"
, "solr/core/src/java/org/apache/solr/update"
, "solr/core/src/java/org/apache/solr/util"
]
//nocommit REMOVE ME! Really! and the check for bare parens. Several times I've put in () when I meant {} and only
// caught it by chance. So for this mass edit, I created a check with a a lot of false positives. This is a list of
// them and we won't report them.
Map<String, List<Integer>> parenHack = [
"AddReplicaCmd.java" : [99]
, "Assign.java" : [329]
, "CommitTracker.java" : [135]
, "DeleteReplicaCmd.java" : [75]
, "DirectUpdateHandler2.java" : [838, 859]
, "ManagedIndexSchemaFactory.java": [284]
, "MoveReplicaCmd.java" : [75]
, "PeerSync.java" : [704]
, "RecordingJSONParser.java" : [76]
, "SliceMutator.java" : [61]
, "SolrDispatchFilter.java" : [150, 205, 242]
, "Suggester.java" : [147, 181]
, "UpdateLog.java" : [1976]
, "V2HttpCall.java" : [158]
]
def logLevels = ["log.trace", "log.debug", "log.info", "log.warn", "log.error", "log.fatal"]
def errsFound = 0;
def violations = new TreeSet();
def reportViolation(String msg) {
violations.add(System.lineSeparator + msg);
errsFound++;
}
// We have a log.something line, check for patterns we're not fond of.
def checkLogLine(File file, String line, int lineNumber, String previous) {
boolean violation = false
def bareParens = (line =~ /".*?"/).findAll()
bareParens.each({ part ->
if (part.contains("()")) {
List<Integer> hack = parenHack.get(file.name)
if (hack == null || hack.contains(lineNumber) == false) {
violation = true
}
}
})
// If the line has been explicitly checked, skip it.
if (line.replaceAll("\\s", "").toLowerCase().contains("//logok")) {
return
}
// Strip all of the comments, things in quotes and the like.
def level = ""
def lev = (line =~ "log\\.(.*?)\\(")
if (lev.find()) {
level = lev.group(1).toLowerCase().trim()
}
def stripped =
line.replaceFirst("//.*", " ") // remove comment to EOL. Again, fragile due to the possibility of embedded double slashes
.replaceFirst(/.*?\(/, " ") // Get rid of "log.info("
.replaceFirst(/\);/, " ") // get rid of the closing ");"
.replaceFirst("/\\*.*?\\*/", " ") // replace embedded comments "/*....*/"
.replaceAll(/".*?"/, " ") // remove anything between quotes. This is a bit fragile if there are embedded double quotes.
.replaceAll(/timeLeft\(.*?\)/, " ") // used all over tests, it's benign
.replaceAll(/TimeUnit\..*?\.convert\(.*?\)/, " ") // again, a pattern that's efficient
def m = stripped =~ "\\(.*?\\)"
def hasParens = m.find()
def hasPlus = stripped.contains("+")
// Check that previous line isn't an if statement for always-reported log levels. Arbitrary decision: we don't
// really care about checking for awkward constructions for WARN and above, so report a violation if the previous
// line contains an if for those levels.
boolean dontReportLevels = level.equals("fatal") || level.equals("error") || level.equals("warn")
// There's a convention to declare a member variable for whether a level is enabled and check that rather than
// isDebugEnabled so we need to check both.
String prevLine = previous.replaceAll("\\s+", "").toLowerCase()
boolean prevLineNotIf = ((prevLine.contains("if(log.is" + level + "enabled") == false
&& prevLine.contains("if(" + level + ")") == false))
if (dontReportLevels) {
// Only look (optionally) for plusses if surrounded by an if isLevelEnabled clause
// Otherwise, we're always OK with logging the message.
if (hasPlus && checkPlus) {
violation = true
}
} else { // less severe than warn, check for parens and plusses and correct
if (hasParens && prevLineNotIf) {
violation = true
}
if (hasPlus && checkPlus) {
violation = true
}
if (hasPlus && prevLineNotIf) {
violation = true
}
}
// Always report toString(). Note, this over-reports constructs like Arrays.toString(something), but just add //logOK.
if (line.contains("toString(") == true) {
violation = true
}
if (violation) {
reportViolation(String.format("Suspicious logging call File: '%s' line: '%d' log message: '%s' parent path: '%s'. Parameterize or surround with 'if (log.is*Enabled) {... stripped: '%s'"
, file.name
, lineNumber
, line
, file.getParentFile().getAbsolutePath()
, stripped
))
}
}
// Require all our logger definitions lower case "log", except a couple of special ones.
def checkLogName(File file, String line) {
// It's many times faster to do check this way than use a regex
if (line.contains("static ") && line.contains("getLogger") && line.contains(" log ") == false) {
switch (file.name) {
case "LoggerFactory.java": break
case "SolrCore.java": // Except for two know log files with a different name.
if (line.contains("requestLog") || line.contains("slowLog")) {
break
}
case "StartupLoggingUtils.java":
if (line.contains("getLoggerImplStr")) {
break;
}
default:
reportViolation("Change the logger name to lower-case 'log' in " + file.name + " " + line)
break;
}
}
}
def checkFile(File file) {
int state = 0 // 0 == not collecting a log line, 1 == collecting a log line, 2 == just collected the last.
int lineNumber = 0
StringBuilder sb = new StringBuilder();
// We have to assemble line-by-line to get the full log statement. We require that all loggers (except requestLog
// and slowLog) be exactly "log". That will be checked as well.
String prevLine = ""
file.eachLine { String line ->
lineNumber++
checkLogName(file, line)
switch (state) {
case 0:
logLevels.each {
if (line.contains(it)) {
if (line.contains(");")) {
state = 2
} else {
state = 1
}
sb.setLength(0)
sb.append(line)
}
}
break
case 1: // collecting
if (line.contains(");")) {
state = 2
}
sb.append(line)
break
default:
reportViolation("Bad state, should be 0-1 in the switch statement") // make people aware the code sucks
break
}
switch (state) { // It's just easier to do this here rather than read another line in the switch above.
case 0:
prevLine = line.toLowerCase();
break;
case 1:
break;
case 2:
checkLogLine(file, sb.toString(), lineNumber, prevLine)
state = 0
break;
default:
break;
}
}
}
@TaskAction
def checkLogLines() {
dirsToCheck.add(srcDir)
project.sourceSets.each { srcSet ->
srcSet.java.each { f ->
if (dirsToCheck.any {
f.getCanonicalPath().contains(it)
}) {
checkFile(f)
}
//nocommit move this to project based whenever
// if (srcDir != null && f.getCanonicalPath().contains(srcDir)) {
// checkFile(f)
// }
}
}
if (errsFound > 0) {
throw new BuildException(String.format(Locale.ENGLISH, 'Found %d violations in source files (%s).',
errsFound, violations.join(', ')));
}
}
}