From 7f033ac12bb290b2cbf5e43672932c31e8b0061a Mon Sep 17 00:00:00 2001 From: Chris Hostetter Date: Fri, 3 Nov 2017 10:44:03 -0700 Subject: [PATCH] SOLR-10934: ref-guide link+anchor checking that doesn't require jekyll --- solr/solr-ref-guide/build.xml | 159 ++++++++++++------ .../tools/CheckLinksAndAnchors.java | 78 +++++++-- .../tools/CustomizedAsciidoctorAntTask.java | 34 ++++ .../tools/asciidoctor-antlib.xml | 22 +++ 4 files changed, 231 insertions(+), 62 deletions(-) create mode 100644 solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java create mode 100644 solr/solr-ref-guide/tools/asciidoctor-antlib.xml diff --git a/solr/solr-ref-guide/build.xml b/solr/solr-ref-guide/build.xml index 1ce3778e9f0..6f989b91194 100644 --- a/solr/solr-ref-guide/build.xml +++ b/solr/solr-ref-guide/build.xml @@ -113,17 +113,19 @@ + + includeantruntime="true"> + + includes="**/*.class,**/*.xml"/> @@ -164,58 +166,80 @@ - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + Finished Building ${build.dir}/${pdf-filename} - - - - - - - - - - - - - - - - - - - - - + + + + + + Ready to browse site: ${build.dir}/html-site/${main-page}.html + depends="build-init,build-nav-data-files" > Running Jekyll... + + + + + + + + + + + + + + Validated Links & Anchors via: ${build.dir}/bare-bones-html/ + + depends="-build-pdf-and-reduce-pdf,build-site"> + PDF: ${build.dir}/${pdf-filename} SITE: ${build.dir}/html-site/${main-page}.html + + diff --git a/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java b/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java index c5dcac28f07..0dc88d70f17 100644 --- a/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java +++ b/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java @@ -45,18 +45,41 @@ import org.jsoup.select.Elements; import org.jsoup.select.NodeVisitor; /** - * Check various things regarding links in the generated HTML site. + * Check various things regarding anchors & links in the generated HTML site. *

* Asciidoctor doesn't do a good job of rectifying situations where multiple documents are included in one * massive (PDF) document may have identical anchors (either explicitly defined, or implicitly defined because of * section headings). Asciidoctor also doesn't support linking directly to another (included) document by name, - * unless there is an explicit '#fragement' used inthe link. + * unless there is an explicit '#fragement' used in the link. *

*

* This tool parses the generated HTML site, looking for these situations in order to fail the build -- since the - * equivilent PDF will be broken. It also does sme general check of the relative URLs to ensure the destination + * equivilent PDF will be broken. It also does some general check of the relative URLs to ensure the destination * files/anchors actaully exist. *

+ *

+ * This tool supports 2 modes, depending on wether you want to run it against the HTML generated by Jekyll, or + * the "bare bones" HTML generated directly by asciidoctor... + *

+ *
    + *
  • Jekyll Mode: + *
      + *
    • CheckLinksAndAnchors html-dir-name/ [false]
    • + *
    • Requires all html pages have a "main-content" div; ignores all links & anchors that + * are not decendents of this div (to exclude redundent template based header, footer, & sidebar links) + *
    • + *
    • Expects that the <body/> tag will have an id matching the page shortname.
    • + *
    + *
  • + *
  • Bare Bones Mode: + *
      + *
    • CheckLinksAndAnchors html-dir-name/ true
    • + *
    • Checks all links & anchors in the page.
    • + *
    • "Fakes" the existence of a <body id="..."> tag containing the page shortname.
    • + *
    + *
  • + *
+ * * * TODO: build a list of all known external links so that some other tool could (optionally) ping them all for 200 status? * @@ -74,11 +97,12 @@ public class CheckLinksAndAnchors { public static void main(String[] args) throws Exception { int problems = 0; - if (args.length != 1) { - System.err.println("usage: CheckLinksAndAnchors "); + if (args.length < 1 || 2 < args.length ) { + System.err.println("usage: CheckLinksAndAnchors []"); System.exit(-1); } final File htmlDir = new File(args[0]); + final boolean bareBones = (2 == args.length) ? Boolean.parseBoolean(args[1]) : false; final File[] pages = htmlDir.listFiles(new HtmlFileFilter()); if (0 == pages.length) { @@ -89,6 +113,9 @@ public class CheckLinksAndAnchors { final Map> idsToFiles = new HashMap<>(); final Map> filesToRelativeLinks = new HashMap<>(); final Set idsInMultiFiles = new HashSet<>(0); + + int totalLinks = 0; + int totalRelativeLinks = 0; for (File file : pages) { //System.out.println("input File URI: " + file.toURI().toString()); @@ -99,25 +126,47 @@ public class CheckLinksAndAnchors { final String fileContents = readFile(file.getPath()); final Document doc = Jsoup.parse(fileContents); - // we only care about class='main-content' -- we don't want to worry + + // For Jekyll, we only care about class='main-content' -- we don't want to worry // about ids/links duplicated in the header/footer of every page, - final Element mainContent = doc.select(".main-content").first(); + final String mainContentSelector = bareBones ? "body" : ".main-content"; + final Element mainContent = doc.select(mainContentSelector).first(); if (mainContent == null) { - throw new RuntimeException(file.getName() + " has no main-content div"); + throw new RuntimeException(file.getName() + " has no main content: " + mainContentSelector); } // Add all of the IDs in (the main-content of) this doc to idsToFiles (and idsInMultiFiles if needed) final Elements nodesWithIds = mainContent.select("[id]"); - // NOTE: add to the nodesWithIds so we check the main section anchor as well - nodesWithIds.addAll(doc.select("body[id]")); + + if (bareBones) { + // It's a pain in the ass to customize the HTML output structure asciidoctor's bare-bones html5 backend + // so instead we "fake" that the body tag contains the attribute we use in jekyll + // (and what gets added explicitly to each top level section in the PDF) + nodesWithIds.add(new Element(Tag.valueOf("body"), "").attr("id", file.getName().replaceAll("\\.html$",""))); + } else { + // We have to add Jekyll's to the nodesWithIds so we check the main section anchor as well + // since we've already + nodesWithIds.addAll(doc.select("body[id]")); + } + + boolean foundPreamble = false; for (Element node : nodesWithIds) { final String id = node.id(); assert null != id; assert 0 != id.length(); - // special case ids that we ignore + // special case id: we ignore the first 'preamble' because + // it's part of the core markup that asciidoctor always uses + // if we find it a second time in a single page, fail with a special error... if (id.equals("preamble")) { - continue; + if (foundPreamble) { + problems++; + System.err.println(file.toURI().toString() + + " contains 'preamble' anchor, this is special in jekyll and must not be used in content."); + } else { + foundPreamble = true; + continue; + } } if (idsToFiles.containsKey(id)) { @@ -131,6 +180,7 @@ public class CheckLinksAndAnchors { // check for (relative) links that don't include a fragment final Elements links = mainContent.select("a[href]"); for (Element link : links) { + totalLinks++; final String href = link.attr("href"); if (0 == href.length()) { problems++; @@ -139,6 +189,7 @@ public class CheckLinksAndAnchors { try { final URI uri = new URI(href); if (! uri.isAbsolute()) { + totalRelativeLinks++; final String frag = uri.getFragment(); if (null == frag || "".equals(frag)) { // we must have a fragment for intra-page links to work correctly @@ -200,7 +251,8 @@ public class CheckLinksAndAnchors { } } - + System.err.println("Processed " + totalLinks + " links (" + totalRelativeLinks + " relative) to " + + idsToFiles.size() + " anchors in " + pages.length + " files"); if (0 < problems) { System.err.println("Total of " + problems + " problems found"); System.exit(-1); diff --git a/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java b/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java new file mode 100644 index 00000000000..5c1d700676d --- /dev/null +++ b/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.asciidoctor.ant.AsciidoctorAntTask; + +/** + * Customized version of the default AsciidoctorAntTask + * To deal with the fact that we want sourceDocumentName="" treated the same as unspecified (ie: null) + * in order to be able to wrap in a macro with defaults + */ +public class CustomizedAsciidoctorAntTask extends AsciidoctorAntTask { + @SuppressWarnings("UnusedDeclaration") + public void setSourceDocumentName(String sourceDocumentName) { + if ("".equals(sourceDocumentName)) { + sourceDocumentName = null; + } + super.setSourceDocumentName(sourceDocumentName); + } +} + + diff --git a/solr/solr-ref-guide/tools/asciidoctor-antlib.xml b/solr/solr-ref-guide/tools/asciidoctor-antlib.xml new file mode 100644 index 00000000000..d67e3e15e33 --- /dev/null +++ b/solr/solr-ref-guide/tools/asciidoctor-antlib.xml @@ -0,0 +1,22 @@ + + + + +