mirror of https://github.com/apache/lucene.git
SOLR-10934: ref-guide link+anchor checking that doesn't require jekyll
This commit is contained in:
parent
1d2787464f
commit
7f033ac12b
|
@ -113,17 +113,19 @@
|
|||
|
||||
<target name="build-tools-jar" depends="resolve" description="Builds the custom java tools use use for generating some data files from page metdata">
|
||||
<mkdir dir="${build.dir}/classes"/>
|
||||
<!-- NOTE: we include the ant runtime so we can compile our customized version of the asciidoctor ant task -->
|
||||
<javac debug="yes"
|
||||
debuglevel="source,lines,vars"
|
||||
destdir="${build.dir}/classes"
|
||||
includeantruntime="false">
|
||||
includeantruntime="true">
|
||||
<compilerarg value="-Xlint:all"/>
|
||||
<classpath refid="tools-compile-classpath"/>
|
||||
<src path="tools/"/>
|
||||
</javac>
|
||||
<copy todir="${build.dir}/classes" file="tools/asciidoctor-antlib.xml" />
|
||||
<jar destfile="${build.dir}/${tools-jar-name}">
|
||||
<fileset dir="${build.dir}/classes"
|
||||
includes="**/*.class"/>
|
||||
includes="**/*.class,**/*.xml"/>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
|
@ -164,58 +166,80 @@
|
|||
</java>
|
||||
</target>
|
||||
|
||||
<target name="check-links-and-anchors" depends="build-init,build-tools-jar" description="Parse the HTML site files to check for problematic links or anchors">
|
||||
<java classname="CheckLinksAndAnchors"
|
||||
failonerror="true"
|
||||
fork="true">
|
||||
<classpath refid="tools-run-classpath"/>
|
||||
<arg value="${build.dir}/html-site"/>
|
||||
</java>
|
||||
</target>
|
||||
<macrodef name="asciidoctor-convert">
|
||||
<!-- custom macro that fills in all the defaults we care about when running asciidoctor-ant
|
||||
The primary purpose for this is to build the PDF, but we also use it to build a bare-bones
|
||||
HTML version for validating the document structure (ie: duplicate anchors, links all point to valid anchors,
|
||||
etc...) that we can't do with the generated PDF, and that we want to be able to validate
|
||||
even if the current user doesn't have jekyll installed
|
||||
-->
|
||||
<attribute name="sourceDirectory"/>
|
||||
<attribute name="sourceDocumentName"/>
|
||||
<attribute name="outputDirectory"/>
|
||||
<attribute name="backend"/>
|
||||
<attribute name="headerFooter" default="true" />
|
||||
<sequential>
|
||||
<!-- NOTE: we have our own variant on the asciidoctor-ant task, so that sourceDocumentName=""
|
||||
is treated the same as if it's unset (ie: null)
|
||||
-->
|
||||
<taskdef uri="antlib:org.asciidoctor.ant" resource="asciidoctor-antlib.xml"
|
||||
classpathref="tools-run-classpath"/>
|
||||
<asciidoctor:convert
|
||||
sourceDirectory="@{sourceDirectory}"
|
||||
sourceDocumentName="@{sourceDocumentName}"
|
||||
baseDir="${build.content.dir}"
|
||||
outputDirectory="@{outputDirectory}"
|
||||
preserveDirectories="true"
|
||||
backend="@{backend}"
|
||||
headerFooter="@{headerFooter}"
|
||||
extensions="adoc"
|
||||
sourceHighlighter="coderay"
|
||||
imagesDir="${build.content.dir}"
|
||||
doctype="book"
|
||||
safemode="unsafe">
|
||||
<attribute key="section-toc" value='' /><!-- we don't use these in the pdf -->
|
||||
<attribute key="icons" value="font" />
|
||||
<attribute key="icon-set" value="fa" />
|
||||
<attribute key="pdf-stylesDir" value="./pdf/themes"/>
|
||||
<attribute key="pdf-style" value="refguide"/>
|
||||
<attribute key="pdf-fontsDir" value="./fonts"/>
|
||||
<attribute key="figure-caption!" value='' />
|
||||
<attribute key="idprefix" value='' />
|
||||
<attribute key="idseparator" value='-' />
|
||||
<!-- attributes used in adoc files -->
|
||||
<!-- NOTE: If you add any attributes here for use in adoc files, you almost certainly need to also add
|
||||
them to the _config.yml.template file for building the jekyll site as well
|
||||
-->
|
||||
<attribute key="solr-guide-draft-status" value="${solr-guide-draft-status}" />
|
||||
<attribute key="solr-guide-version" value="${solr-guide-version}" />
|
||||
<attribute key="solr-docs-version" value="${solr-docs-version}" />
|
||||
<attribute key="solr-javadocs" value="${solr-javadocs}" />
|
||||
<attribute key="lucene-javadocs" value="${lucene-javadocs}" />
|
||||
<attribute key="build-date" value="${DSTAMP}" />
|
||||
<attribute key="build-year" value="${current.year}" />
|
||||
</asciidoctor:convert>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
|
||||
<!-- ====== PDF Build ======= -->
|
||||
<target name="build-pdf" depends="-build-raw-pdf,-reduce-pdf-size" description="Builds a PDF">
|
||||
<target name="build-pdf" depends="bare-bones-html-validation,-build-pdf-and-reduce-pdf"
|
||||
description="Builds the PDF (after building & validating a bare-bones html version)" />
|
||||
<target name="-build-pdf-and-reduce-pdf" depends="-build-raw-pdf,-reduce-pdf-size">
|
||||
<!-- NOTE: this does everything realted to building the PDF, but skips the bare-bones-html validation -->
|
||||
<echo>Finished Building ${build.dir}/${pdf-filename}</echo>
|
||||
</target>
|
||||
<target name="-build-raw-pdf"
|
||||
depends="build-nav-data-files,resolve">
|
||||
<mkdir dir="${build.dir}/pdf-tmp"/>
|
||||
<taskdef uri="antlib:org.asciidoctor.ant" resource="org/asciidoctor/ant/antlib.xml"
|
||||
classpathref="tools-run-classpath"/>
|
||||
<asciidoctor:convert
|
||||
sourceDirectory="${build.content.dir}/pdf"
|
||||
sourceDocumentName="SolrRefGuide-all.adoc"
|
||||
baseDir="${build.content.dir}"
|
||||
outputDirectory="${build.dir}/pdf-tmp"
|
||||
backend="pdf"
|
||||
extensions="adoc"
|
||||
sourceHighlighter="coderay"
|
||||
imagesDir="${build.content.dir}"
|
||||
doctype="book"
|
||||
safemode="unsafe">
|
||||
<attribute key="section-toc" value='' /><!-- we don't use these in the pdf -->
|
||||
<attribute key="icons" value="font" />
|
||||
<attribute key="icon-set" value="fa" />
|
||||
<attribute key="pdf-stylesDir" value="./pdf/themes"/>
|
||||
<attribute key="pdf-style" value="refguide"/>
|
||||
<attribute key="pdf-fontsDir" value="./fonts"/>
|
||||
<attribute key="figure-caption!" value='' />
|
||||
<attribute key="idprefix" value='' />
|
||||
<attribute key="idseparator" value='-' />
|
||||
<!-- attributes used in adoc files -->
|
||||
<!-- NOTE: If you add any attributes here for use in adoc files, you almost certainly need to also add
|
||||
them to the _config.yml.template file for building the jekyll site as well
|
||||
-->
|
||||
<attribute key="solr-guide-draft-status" value="${solr-guide-draft-status}" />
|
||||
<attribute key="solr-guide-version" value="${solr-guide-version}" />
|
||||
<attribute key="solr-docs-version" value="${solr-docs-version}" />
|
||||
<attribute key="solr-javadocs" value="${solr-javadocs}" />
|
||||
<attribute key="lucene-javadocs" value="${lucene-javadocs}" />
|
||||
<attribute key="build-date" value="${DSTAMP}" />
|
||||
<attribute key="build-year" value="${current.year}" />
|
||||
</asciidoctor:convert>
|
||||
<asciidoctor-convert sourceDirectory="${build.content.dir}/pdf"
|
||||
sourceDocumentName="SolrRefGuide-all.adoc"
|
||||
outputDirectory="${build.dir}/pdf-tmp"
|
||||
backend="pdf"
|
||||
/>
|
||||
<move file="${build.dir}/pdf-tmp/SolrRefGuide-all.pdf" tofile="${build.dir}/pdf-tmp/RAW-${pdf-filename}" />
|
||||
</target>
|
||||
|
||||
<target name="-reduce-pdf-size" depends="build-init,build-tools-jar">
|
||||
<java classname="ReducePDFSize"
|
||||
failonerror="true"
|
||||
|
@ -232,24 +256,61 @@
|
|||
Builds site with Jekyll.
|
||||
This (for now) assumes that Jekyll (http://jekyllrb.com) is installed locally. -->
|
||||
<target name="build-site"
|
||||
depends="-build-site,check-links-and-anchors"
|
||||
depends="-build-site"
|
||||
description="Builds an HTML Site w/Jekyll and verifies the anchors+links are valid" >
|
||||
<java classname="CheckLinksAndAnchors"
|
||||
failonerror="true"
|
||||
fork="true">
|
||||
<classpath refid="tools-run-classpath"/>
|
||||
<arg value="${build.dir}/html-site"/>
|
||||
</java>
|
||||
<echo>Ready to browse site: ${build.dir}/html-site/${main-page}.html</echo>
|
||||
</target>
|
||||
<target name="-build-site"
|
||||
depends="build-init,build-nav-data-files"
|
||||
description="Builds an HTML Site w/Jekyll">
|
||||
depends="build-init,build-nav-data-files" >
|
||||
<echo>Running Jekyll...</echo>
|
||||
<exec executable="jekyll" dir="${build.content.dir}">
|
||||
<arg value="build"/>
|
||||
</exec>
|
||||
</target>
|
||||
|
||||
<!-- ======= HTML Bare Bones Conversion =======
|
||||
Does a very raw converstion of the adoc files to HTML for the purpose of link & anchor checking
|
||||
|
||||
Unlike the "HTML Site Build" above, this does *NOT* require Jekyll, and can be done entirely
|
||||
With ivy deps fetched automatically (just like the PDF)
|
||||
-->
|
||||
<target name="bare-bones-html-validation" depends="build-init,build-nav-data-files"
|
||||
description="Builds (w/o Jekyll) a very simple html version of the guide and runs link/anchor validation on it">
|
||||
|
||||
<delete dir="${build.dir}/bare-bones-html"/>
|
||||
<mkdir dir="${build.dir}/bare-bones-html"/>
|
||||
<asciidoctor-convert sourceDirectory="${build.content.dir}"
|
||||
sourceDocumentName=""
|
||||
outputDirectory="${build.dir}/bare-bones-html"
|
||||
headerFooter="false"
|
||||
backend="html5"
|
||||
/>
|
||||
|
||||
<java classname="CheckLinksAndAnchors"
|
||||
failonerror="true"
|
||||
fork="true">
|
||||
<classpath refid="tools-run-classpath"/>
|
||||
<arg value="${build.dir}/bare-bones-html"/>
|
||||
<arg value="true" />
|
||||
</java>
|
||||
<echo>Validated Links & Anchors via: ${build.dir}/bare-bones-html/</echo>
|
||||
</target>
|
||||
|
||||
<target name="default"
|
||||
description="Builds both a PDF and HTML versions of the ref guide"
|
||||
depends="build-pdf,build-site">
|
||||
depends="-build-pdf-and-reduce-pdf,build-site">
|
||||
<!-- NOTE: we don't depend on build-pdf because then we'd also get the bare-bones HTML and do
|
||||
link validation twice -->
|
||||
<echo>PDF: ${build.dir}/${pdf-filename}</echo>
|
||||
<echo>SITE: ${build.dir}/html-site/${main-page}.html</echo>
|
||||
</target>
|
||||
|
||||
|
||||
|
||||
</project>
|
||||
|
|
|
@ -45,18 +45,41 @@ import org.jsoup.select.Elements;
|
|||
import org.jsoup.select.NodeVisitor;
|
||||
|
||||
/**
|
||||
* Check various things regarding links in the generated HTML site.
|
||||
* Check various things regarding anchors & links in the generated HTML site.
|
||||
* <p>
|
||||
* Asciidoctor doesn't do a good job of rectifying situations where multiple documents are included in one
|
||||
* massive (PDF) document may have identical anchors (either explicitly defined, or implicitly defined because of
|
||||
* section headings). Asciidoctor also doesn't support linking directly to another (included) document by name,
|
||||
* unless there is an explicit '#fragement' used inthe link.
|
||||
* unless there is an explicit '#fragement' used in the link.
|
||||
* </p>
|
||||
* <p>
|
||||
* This tool parses the generated HTML site, looking for these situations in order to fail the build -- since the
|
||||
* equivilent PDF will be broken. It also does sme general check of the relative URLs to ensure the destination
|
||||
* equivilent PDF will be broken. It also does some general check of the relative URLs to ensure the destination
|
||||
* files/anchors actaully exist.
|
||||
* </p>
|
||||
* <p>
|
||||
* This tool supports 2 modes, depending on wether you want to run it against the HTML generated by Jekyll, or
|
||||
* the "bare bones" HTML generated directly by asciidoctor...
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>Jekyll Mode:
|
||||
* <ul>
|
||||
* <li><code>CheckLinksAndAnchors html-dir-name/ [false]</li>
|
||||
* <li>Requires all html pages have a "main-content" div; ignores all links & anchors that
|
||||
* are <em>not</em> decendents of this div (to exclude redundent template based header, footer, & sidebar links)
|
||||
* </li>
|
||||
* <li>Expects that the <code><body/></code> tag will have an <code>id</code> matching the page shortname.</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>Bare Bones Mode:
|
||||
* <ul>
|
||||
* <li><code>CheckLinksAndAnchors html-dir-name/ true</li>
|
||||
* <li>Checks all links & anchors in the page.</li>
|
||||
* <li>"Fakes" the existence of a <code><body id="..."></code> tag containing the page shortname.</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
*
|
||||
* TODO: build a list of all known external links so that some other tool could (optionally) ping them all for 200 status?
|
||||
*
|
||||
|
@ -74,11 +97,12 @@ public class CheckLinksAndAnchors {
|
|||
public static void main(String[] args) throws Exception {
|
||||
int problems = 0;
|
||||
|
||||
if (args.length != 1) {
|
||||
System.err.println("usage: CheckLinksAndAnchors <htmldir>");
|
||||
if (args.length < 1 || 2 < args.length ) {
|
||||
System.err.println("usage: CheckLinksAndAnchors <htmldir> [<bare-bones-boolean>]");
|
||||
System.exit(-1);
|
||||
}
|
||||
final File htmlDir = new File(args[0]);
|
||||
final boolean bareBones = (2 == args.length) ? Boolean.parseBoolean(args[1]) : false;
|
||||
|
||||
final File[] pages = htmlDir.listFiles(new HtmlFileFilter());
|
||||
if (0 == pages.length) {
|
||||
|
@ -89,6 +113,9 @@ public class CheckLinksAndAnchors {
|
|||
final Map<String,List<File>> idsToFiles = new HashMap<>();
|
||||
final Map<File,List<URI>> filesToRelativeLinks = new HashMap<>();
|
||||
final Set<String> idsInMultiFiles = new HashSet<>(0);
|
||||
|
||||
int totalLinks = 0;
|
||||
int totalRelativeLinks = 0;
|
||||
|
||||
for (File file : pages) {
|
||||
//System.out.println("input File URI: " + file.toURI().toString());
|
||||
|
@ -99,25 +126,47 @@ public class CheckLinksAndAnchors {
|
|||
|
||||
final String fileContents = readFile(file.getPath());
|
||||
final Document doc = Jsoup.parse(fileContents);
|
||||
// we only care about class='main-content' -- we don't want to worry
|
||||
|
||||
// For Jekyll, we only care about class='main-content' -- we don't want to worry
|
||||
// about ids/links duplicated in the header/footer of every page,
|
||||
final Element mainContent = doc.select(".main-content").first();
|
||||
final String mainContentSelector = bareBones ? "body" : ".main-content";
|
||||
final Element mainContent = doc.select(mainContentSelector).first();
|
||||
if (mainContent == null) {
|
||||
throw new RuntimeException(file.getName() + " has no main-content div");
|
||||
throw new RuntimeException(file.getName() + " has no main content: " + mainContentSelector);
|
||||
}
|
||||
|
||||
// Add all of the IDs in (the main-content of) this doc to idsToFiles (and idsInMultiFiles if needed)
|
||||
final Elements nodesWithIds = mainContent.select("[id]");
|
||||
// NOTE: add <body> to the nodesWithIds so we check the main section anchor as well
|
||||
nodesWithIds.addAll(doc.select("body[id]"));
|
||||
|
||||
if (bareBones) {
|
||||
// It's a pain in the ass to customize the HTML output structure asciidoctor's bare-bones html5 backend
|
||||
// so instead we "fake" that the body tag contains the attribute we use in jekyll
|
||||
// (and what gets added explicitly to each top level section in the PDF)
|
||||
nodesWithIds.add(new Element(Tag.valueOf("body"), "").attr("id", file.getName().replaceAll("\\.html$","")));
|
||||
} else {
|
||||
// We have to add Jekyll's <body> to the nodesWithIds so we check the main section anchor as well
|
||||
// since we've already
|
||||
nodesWithIds.addAll(doc.select("body[id]"));
|
||||
}
|
||||
|
||||
boolean foundPreamble = false;
|
||||
for (Element node : nodesWithIds) {
|
||||
final String id = node.id();
|
||||
assert null != id;
|
||||
assert 0 != id.length();
|
||||
|
||||
// special case ids that we ignore
|
||||
// special case id: we ignore the first 'preamble' because
|
||||
// it's part of the core markup that asciidoctor always uses
|
||||
// if we find it a second time in a single page, fail with a special error...
|
||||
if (id.equals("preamble")) {
|
||||
continue;
|
||||
if (foundPreamble) {
|
||||
problems++;
|
||||
System.err.println(file.toURI().toString() +
|
||||
" contains 'preamble' anchor, this is special in jekyll and must not be used in content.");
|
||||
} else {
|
||||
foundPreamble = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (idsToFiles.containsKey(id)) {
|
||||
|
@ -131,6 +180,7 @@ public class CheckLinksAndAnchors {
|
|||
// check for (relative) links that don't include a fragment
|
||||
final Elements links = mainContent.select("a[href]");
|
||||
for (Element link : links) {
|
||||
totalLinks++;
|
||||
final String href = link.attr("href");
|
||||
if (0 == href.length()) {
|
||||
problems++;
|
||||
|
@ -139,6 +189,7 @@ public class CheckLinksAndAnchors {
|
|||
try {
|
||||
final URI uri = new URI(href);
|
||||
if (! uri.isAbsolute()) {
|
||||
totalRelativeLinks++;
|
||||
final String frag = uri.getFragment();
|
||||
if (null == frag || "".equals(frag)) {
|
||||
// we must have a fragment for intra-page links to work correctly
|
||||
|
@ -200,7 +251,8 @@ public class CheckLinksAndAnchors {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
System.err.println("Processed " + totalLinks + " links (" + totalRelativeLinks + " relative) to " +
|
||||
idsToFiles.size() + " anchors in " + pages.length + " files");
|
||||
if (0 < problems) {
|
||||
System.err.println("Total of " + problems + " problems found");
|
||||
System.exit(-1);
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import org.asciidoctor.ant.AsciidoctorAntTask;
|
||||
|
||||
/**
|
||||
* Customized version of the default AsciidoctorAntTask
|
||||
* To deal with the fact that we want sourceDocumentName="" treated the same as unspecified (ie: null)
|
||||
* in order to be able to wrap in a macro with defaults
|
||||
*/
|
||||
public class CustomizedAsciidoctorAntTask extends AsciidoctorAntTask {
|
||||
@SuppressWarnings("UnusedDeclaration")
|
||||
public void setSourceDocumentName(String sourceDocumentName) {
|
||||
if ("".equals(sourceDocumentName)) {
|
||||
sourceDocumentName = null;
|
||||
}
|
||||
super.setSourceDocumentName(sourceDocumentName);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<antlib>
|
||||
<typedef name="convert" classname="CustomizedAsciidoctorAntTask"/>
|
||||
</antlib>
|
Loading…
Reference in New Issue