SOLR-12134: hook ref-guide 'bare-bones-html' validation into top level documentation target using relative javadoc URL prefixess that are now validated to point to real files

This commit is contained in:
Chris Hostetter 2018-04-03 16:12:34 -07:00
parent b87cbc2f75
commit c0709f113d
5 changed files with 114 additions and 45 deletions

View File

@ -188,7 +188,12 @@
<target name="javadocs" description="Calls javadocs-all, javadocs-solrj, and javadocs-test-framework"
depends="define-lucene-javadoc-url,javadocs-solr-core,javadocs-solrj,javadocs-test-framework,javadocs-contrib"/>
<target name="documentation" description="Generate all documentation"
depends="javadocs,changes-to-html,process-webpages"/>
depends="javadocs,changes-to-html,process-webpages">
<ant dir="solr-ref-guide" target="bare-bones-html-validation" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
<property name="local.javadocs" value="true" />
</ant>
</target>
<target name="compile-core" depends="compile-solr-core" unless="solr.core.compiled"/>
<target name="documentation-online" description="Generate a link to the online documentation"

View File

@ -83,8 +83,24 @@
</filterchain>
</loadresource>
<property name="solr-javadocs" value="https://lucene.apache.org/solr/${solr-docs-version-path}/" />
<property name="lucene-javadocs" value="https://lucene.apache.org/core/${solr-docs-version-path}/" />
<!-- where we link to javadocs from the html guide, and if we validate them, is all dependent
on the 'local.javadocs' sysprop -->
<condition property="check-all-relative-links" value="-check-all-relative-links" else="">
<isset property="local.javadocs" />
</condition>
<condition property="html-solr-javadocs"
value="link:../../docs/"
else="https://lucene.apache.org/solr/${solr-docs-version-path}/">
<isset property="local.javadocs" />
</condition>
<condition property="html-lucene-javadocs"
value="link:../../../../lucene/build/docs/"
else="https://lucene.apache.org/core/${solr-docs-version-path}/">
<isset property="local.javadocs" />
</condition>
<!-- for the PDF guide, we always use absolute javadoc urls -->
<property name="pdf-solr-javadocs" value="https://lucene.apache.org/solr/${solr-docs-version-path}/" />
<property name="pdf-lucene-javadocs" value="https://lucene.apache.org/core/${solr-docs-version-path}/" />
<property name="build.content.dir" location="${build.dir}/content" />
<property name="main-page" value="index" />
@ -175,6 +191,8 @@
<attribute name="sourceDocumentName"/>
<attribute name="outputDirectory"/>
<attribute name="backend"/>
<attribute name="solr-javadocs" default="${pdf-solr-javadocs}" />
<attribute name="lucene-javadocs" default="#{pdf-lucene-javadocs}" />
<attribute name="headerFooter" default="true" />
<sequential>
<!-- NOTE: we have our own variant on the asciidoctor-ant task, so that sourceDocumentName=""
@ -212,8 +230,8 @@
<attribute key="solr-guide-draft-status" value="${solr-guide-draft-status}" />
<attribute key="solr-guide-version" value="${solr-guide-version}" />
<attribute key="solr-docs-version" value="${solr-docs-version}" />
<attribute key="solr-javadocs" value="${solr-javadocs}" />
<attribute key="lucene-javadocs" value="${lucene-javadocs}" />
<attribute key="solr-javadocs" value="@{solr-javadocs}" />
<attribute key="lucene-javadocs" value="@{lucene-javadocs}" />
<attribute key="build-date" value="${DSTAMP}" />
<attribute key="build-year" value="${current.year}" />
<attribute key="ivy-commons-codec-version" value="${ivyversions./commons-codec/commons-codec}" />
@ -269,6 +287,7 @@
fork="true">
<classpath refid="tools-run-classpath"/>
<arg value="${build.dir}/html-site"/>
<arg value="${check-all-relative-links}" />
</java>
<echo>Ready to browse site: ${build.dir}/html-site/${main-page}.html</echo>
</target>
@ -296,6 +315,8 @@
outputDirectory="${build.dir}/bare-bones-html"
headerFooter="false"
backend="html5"
solr-javadocs="${html-solr-javadocs}"
lucene-javadocs="${html-lucene-javadocs}"
/>
<java classname="CheckLinksAndAnchors"
@ -303,7 +324,8 @@
fork="true">
<classpath refid="tools-run-classpath"/>
<arg value="${build.dir}/bare-bones-html"/>
<arg value="true" />
<arg value="-bare-bones" />
<arg value="${check-all-relative-links}" />
</java>
<echo>Validated Links &amp; Anchors via: ${build.dir}/bare-bones-html/</echo>
</target>

View File

@ -74,8 +74,8 @@ solr-attributes: &solr-attributes-ref
solr-guide-version: "${solr-guide-version}"
solr-guide-version-path: "${solr-guide-version-path}"
solr-docs-version: "${solr-docs-version}"
solr-javadocs: "${solr-javadocs}"
lucene-javadocs: "${lucene-javadocs}"
solr-javadocs: "${html-solr-javadocs}"
lucene-javadocs: "${html-lucene-javadocs}"
build-date: "${DSTAMP}"
build-year: "${current.year}"
ivy-commons-codec-version: "${ivyversions./commons-codec/commons-codec}"

View File

@ -18,7 +18,7 @@ Finally, we'll introduce <<Spatial Queries,spatial search>> and show you how to
To follow along with this tutorial, you will need...
// TODO possibly remove this system requirements or only replace the link
. To meet the {solr-javadocs}/solr/api/SYSTEM_REQUIREMENTS.html[system requirements]
. To meet the {solr-javadocs}/SYSTEM_REQUIREMENTS.html[system requirements]
. An Apache Solr release http://lucene.apache.org/solr/downloads.html[download]. This tutorial is designed for Apache Solr {solr-docs-version}.
For best results, please run the browser showing this tutorial and the Solr server on the same machine so tutorial links will correctly point to your Solr server.

View File

@ -28,7 +28,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
@ -48,6 +48,10 @@ import org.jsoup.select.NodeVisitor;
* Check various things regarding anchors, links &amp; general doc structure in the generated HTML site.
*
* <p>
* Usage: <code>java CheckLinksAndAnchors some-html-dir-name/ [-check-all-relative-links] [-bare-bones]</code>
* </p>
* <p>
* Problems this tool checks for...
* </p>
*
@ -55,8 +59,8 @@ import org.jsoup.select.NodeVisitor;
* <li>
* Asciidoctor doesn't do a good job of rectifying situations where multiple documents are included in one
* massive (PDF) document may have identical anchors (either explicitly defined, or implicitly defined because of
* section headings). Asciidoctor also doesn't support linking directly to another (included) document by name,
* unless there is an explicit '#fragement' used in the link.
* section headings). Asciidoctor also doesn't support linking directly to another (included) asciidoc
* document by name, unless there is an explicit '#fragement' used in the link.
* </li>
* <li>
* Any "relative" link should point to a file that actually exists.
@ -75,28 +79,42 @@ import org.jsoup.select.NodeVisitor;
* problems in the generated PDF.
* </p>
* <p>
* This tool supports 2 modes, depending on wether you want to run it against the HTML generated by Jekyll, or
* the "bare bones" HTML generated directly by asciidoctor...
* This tool supports 2 command line options:
* </p>
* <ul>
* <li><b>-check-all-relative-links</b><br />
* <p>By default, only relative links to files in the same directory (ie: not startin with
* <code>"../"</code> are checked for existence. This means that we can do a "quick" validatation of
* links to other ref-guide files, but ignore relative links to things outside of the ref-guide --
* such as javadocs that we may not currently have built. If this option is specified then we
* <em>also</em> check relative links where the path starts with <code>"../"</code>
* </p>
* </li>
* <li><b>-bare-bones</b><br/>
* <p>By default, this tool assumes it is analyzing Jekyll generated files. If this option is specified,
* then it instead assumes it's checking "bare bones" HTML files...
* </p>
* <ul>
* <li>Jekyll Mode:
* <ul>
* <li><code>CheckLinksAndAnchors html-dir-name/ [false]</li>
* <li>Requires all html pages have a "main-content" div; ignores all DOM Nodes that are
* <em>not</em> decendents of this div (to exclude redundent template based header, footer, &amp; sidebar links)
* <em>not</em> decendents of this div (to exclude redundent template based header, footer,
* &amp; sidebar links)
* </li>
* <li>Expects that the <code>&lt;body/&gt;</code> tag will have an <code>id</code> matching the page shortname.</li>
* <li>Expects that the <code>&lt;body/&gt;</code> tag will have an <code>id</code> matching
* the page shortname.</li>
* </ul>
* </li>
* <li>Bare Bones Mode:
* <ul>
* <li><code>CheckLinksAndAnchors html-dir-name/ true</li>
* <li>Checks all links &amp; anchors in the page.</li>
* <li>"Fakes" the existence of a <code>&lt;body id="..."&gt;</code> tag containing the page shortname.</li>
* <li>"Fakes" the existence of a <code>&lt;body id="..."&gt;</code> tag containing the
* page shortname.</li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
*
* TODO: build a list of all known external links so that some other tool could (optionally) ping them all for 200 status?
*
@ -114,22 +132,35 @@ public class CheckLinksAndAnchors { // TODO: rename this class now that it does
public static void main(String[] args) throws Exception {
int problems = 0;
if (args.length < 1 || 2 < args.length ) {
System.err.println("usage: CheckLinksAndAnchors <htmldir> [<bare-bones-boolean>]");
if (args.length < 1) {
System.err.println("usage: CheckLinksAndAnchors <htmldir> [-check-all-relative-links] [-bare-bones]");
System.exit(-1);
}
final File htmlDir = new File(args[0]);
final boolean bareBones = (2 == args.length) ? Boolean.parseBoolean(args[1]) : false;
final Set<String> options = new LinkedHashSet<>();
for (int i = 1; i < args.length; i++) {
if (! args[i].trim().isEmpty()) { // ignore blank options - maybe an ant sysprop blanked on purpose
options.add(args[i]);
}
}
final boolean bareBones = options.remove("-bare-bones");
final boolean checkAllRelativeLinks = options.remove("-check-all-relative-links");
if (! options.isEmpty()) {
for (String brokenOpt : options) {
System.err.println("CheckLinksAndAnchors: Unrecognized option: " + brokenOpt);
}
System.exit(-1);
}
final File[] pages = htmlDir.listFiles(new HtmlFileFilter());
if (0 == pages.length) {
System.err.println("No HTML Files found, wrong htmlDir? forgot to built the site?");
System.err.println("CheckLinksAndAnchors: No HTML Files found, wrong htmlDir? forgot to built the site?");
System.exit(-1);
}
final Map<String,List<File>> idsToFiles = new HashMap<>();
final Map<File,List<URI>> filesToRelativeLinks = new HashMap<>();
final Set<String> idsInMultiFiles = new HashSet<>(0);
final Set<String> idsInMultiFiles = new LinkedHashSet<>(0);
int totalLinks = 0;
int totalRelativeLinks = 0;
@ -208,8 +239,10 @@ public class CheckLinksAndAnchors { // TODO: rename this class now that it does
if (! uri.isAbsolute()) {
totalRelativeLinks++;
final String frag = uri.getFragment();
if (null == frag || "".equals(frag)) {
if ((null == frag || "".equals(frag)) && ! uri.getPath().startsWith("../")) {
// we must have a fragment for intra-page links to work correctly
// but relative links "up and out" of ref-guide (Ex: local javadocs)
// don't require them (even if checkAllRelativeLinks is set)
problems++;
System.err.println(file.toURI().toString() + " contains relative link w/o an '#anchor': " + href);
} else {
@ -252,14 +285,21 @@ public class CheckLinksAndAnchors { // TODO: rename this class now that it does
final File source = entry.getKey();
for (URI link : entry.getValue()) {
final String path = (null == link.getPath() || "".equals(link.getPath())) ? source.getName() : link.getPath();
final String frag = link.getFragment();
if ( ! idsInMultiFiles.contains(frag) ) { // skip problematic dups already reported
final File dest = new File(htmlDir, path);
if ( ! dest.exists() ) {
// this is only a problem if it's in our dir, or checkAllRelativeLinks is set...
if (checkAllRelativeLinks || ! path.startsWith("../")) {
problems++;
System.err.println("Relative link points at dest file that doesn't exist: " + link);
System.err.println(" ... source: " + source.toURI().toString());
} else if ( ( ! idsToFiles.containsKey(frag) ) || // no file contains this id, or...
}
} else {
if ( ! path.startsWith("../") ) {
// if the dest file is part of the ref guide (ie: not an "up and out" link to javadocs)
// then we validate the fragment is known and unique...
final String frag = link.getFragment();
if ( ! idsInMultiFiles.contains(frag) ) { // skip problematic dups already reported
if ( ( ! idsToFiles.containsKey(frag) ) || // no file contains this id, or...
// id exists, but not in linked file
( ! idsToFiles.get(frag).get(0).getName().equals(path) )) {
problems++;
@ -269,6 +309,8 @@ public class CheckLinksAndAnchors { // TODO: rename this class now that it does
}
}
}
}
}
System.err.println("Processed " + totalLinks + " links (" + totalRelativeLinks + " relative) to " +
idsToFiles.size() + " anchors in " + pages.length + " files");