diff --git a/build.xml b/build.xml index 96c4526a26c..cc1aff699f6 100644 --- a/build.xml +++ b/build.xml @@ -138,7 +138,7 @@ 'java', 'jflex', 'py', 'pl', 'g4', 'jj', 'html', 'js', 'css', 'xml', 'xsl', 'vm', 'sh', 'cmd', 'bat', 'policy', 'properties', 'mdtext', - 'template', + 'template', 'adoc', ]; def invalidPatterns = [ (~$/@author\b/$) : '@author javadoc tag', @@ -170,10 +170,15 @@ def javaCommentPattern = ~$/(?sm)^\Q/*\E(.*?)\Q*/\E/$; def xmlCommentPattern = ~$/(?sm)\Q\E/$; def lineSplitter = ~$/[\r\n]+/$; + def singleLineSplitter = ~$/\n\r?/$; def licenseMatcher = Defaults.createDefaultMatcher(); def validLoggerPattern = ~$/(?s)\b(private\s|static\s|final\s){3}+\s*Logger\s+\p{javaJavaIdentifierStart}+\s+=\s+\QLoggerFactory.getLogger(MethodHandles.lookup().lookupClass());\E/$; def packagePattern = ~$/(?m)^\s*package\s+org\.apache.*;/$; def xmlTagPattern = ~$/(?m)\s*<[a-zA-Z].*/$; + def sourceHeaderPattern = ~$/\[source\b.*/$; + def blockBoundaryPattern = ~$/----\s*/$; + def blockTitlePattern = ~$/\..*/$; + def unescapedSymbolPattern = ~$/(?<=[^\\]|^)([-=]>|<[-=])/$; // SOLR-10883 def isLicense = { matcher, ratDocument -> licenseMatcher.reset(); @@ -197,6 +202,33 @@ } } + def checkForUnescapedSymbolSubstitutions = { f, text -> + def inCodeBlock = false; + def underSourceHeader = false; + def lineNumber = 0; + singleLineSplitter.split(text).each { + ++lineNumber; + if (underSourceHeader) { // This line is either a single source line, or the boundary of a code block + inCodeBlock = blockBoundaryPattern.matcher(it).matches(); + if ( ! blockTitlePattern.matcher(it).matches()) { // Keep underSourceHeader=true + underSourceHeader = false; + } + } else { + if (inCodeBlock) { + inCodeBlock = ! blockBoundaryPattern.matcher(it).matches(); + } else { + underSourceHeader = sourceHeaderPattern.matcher(it).matches(); + if ( ! underSourceHeader) { + def unescapedSymbolMatcher = unescapedSymbolPattern.matcher(it); + if (unescapedSymbolMatcher.find()) { + reportViolation(f, 'Unescaped symbol "' + unescapedSymbolMatcher.group(1) + '" on line #' + lineNumber); + } + } + } + } + } + } + ant.fileScanner{ fileset(dir: baseDir){ extensions.each{ @@ -244,6 +276,9 @@ if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) { checkLicenseHeaderPrecedes(f, '', xmlTagPattern, xmlCommentPattern, text, ratDocument); } + if (f.toString().endsWith('.adoc')) { + checkForUnescapedSymbolSubstitutions(f, text); + } }; if (found) { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 64bd4b5810b..aff72dbcd27 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -472,6 +472,10 @@ Other Changes * SOLR-10834: Fixed tests and test configs to stop using numeric uniqueKey fields (hossman) +* SOLR-10883: Ref guide: Escape replacement substitutions, e.g. => to right arrow, so that they are + rendered visibly in the PDF. Also add .adoc file checks to the top-level validate target, including + for the invisible substitutions PDF problem. (Steve Rowe) + ================== 6.6.1 ================== Bug Fixes diff --git a/solr/solr-ref-guide/src/charfilterfactories.adoc b/solr/solr-ref-guide/src/charfilterfactories.adoc index 5b64c875e41..6010a319f57 100644 --- a/solr/solr-ref-guide/src/charfilterfactories.adoc +++ b/solr/solr-ref-guide/src/charfilterfactories.adoc @@ -43,8 +43,8 @@ Example: Mapping file syntax: * Comment lines beginning with a hash mark (`#`), as well as blank lines, are ignored. -* Each non-comment, non-blank line consists of a mapping of the form: `"source" => "target"` -** Double-quoted source string, optional whitespace, an arrow (`=>`), optional whitespace, double-quoted target string. +* Each non-comment, non-blank line consists of a mapping of the form: `"source" \=> "target"` +** Double-quoted source string, optional whitespace, an arrow (`\=>`), optional whitespace, double-quoted target string. * Trailing comments on mapping lines are not allowed. * The source string must contain at least one character, but the target string may be empty. * The following character escape sequences are recognized within source and target strings: @@ -54,14 +54,14 @@ Mapping file syntax: [cols="20,30,20,30",options="header"] |=== |Escape Sequence |Resulting Character (http://www.ecma-international.org/publications/standards/Ecma-048.htm[ECMA-48] alias) |Unicode Character |Example Mapping Line -|`\\` |`\` |U+005C |`"\\" => "/"` -|`\"` |`"` |U+0022 |`"\"and\"" => "'and'"` -|`\b` |backspace (BS) |U+0008 |`"\b" => " "` -|`\t` |tab (HT) |U+0009 |`"\t" => ","` -|`\n` |newline (LF) |U+000A |`"\n" => "
"` -|`\f` |form feed (FF) |U+000C |`"\f" => "\n"` -|`\r` |carriage return (CR) |U+000D |`"\r" => "/carriage-return/"` -|`\uXXXX` |Unicode char referenced by the 4 hex digits |U+XXXX |`"\uFEFF" => ""` +|`\\` |`\` |U+005C |`"\\" \=> "/"` +|`\"` |`"` |U+0022 |`"\"and\"" \=> "'and'"` +|`\b` |backspace (BS) |U+0008 |`"\b" \=> " "` +|`\t` |tab (HT) |U+0009 |`"\t" \=> ","` +|`\n` |newline (LF) |U+000A |`"\n" \=> "
"` +|`\f` |form feed (FF) |U+000C |`"\f" \=> "\n"` +|`\r` |carriage return (CR) |U+000D |`"\r" \=> "/carriage-return/"` +|`\uXXXX` |Unicode char referenced by the 4 hex digits |U+XXXX |`"\uFEFF" \=> ""` |=== ** A backslash followed by any other character is interpreted as if the character were present without the backslash. @@ -96,8 +96,8 @@ The table below presents examples of HTML stripping. |=== |Input |Output |`my link` |my link -|`
hello` |hello -|`hello'); -->` |hello +|`
hello