LUCENE-7727: Replace end-of-life Markdown parser "Pegdown" by "Flexmark" for compatibility with Java 9

This commit is contained in:
Uwe Schindler 2017-03-02 17:43:44 +01:00
parent 3087eb5006
commit 707d7b91e8
6 changed files with 53 additions and 42 deletions

View File

@ -89,7 +89,7 @@
<fail message="The Beast only works inside of individual modules"/>
</target>
<target name="documentation" depends="resolve-pegdown" description="Generate Lucene and Solr Documentation">
<target name="documentation" depends="resolve-markdown" description="Generate Lucene and Solr Documentation">
<subant target="documentation" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
@ -97,7 +97,7 @@
</subant>
</target>
<target name="documentation-lint" depends="resolve-pegdown,-ecj-javadoc-lint-unsupported,-ecj-resolve" description="Validates the generated documentation (HTML errors, broken links,...)">
<target name="documentation-lint" depends="resolve-markdown,-ecj-javadoc-lint-unsupported,-ecj-resolve" description="Validates the generated documentation (HTML errors, broken links,...)">
<subant target="documentation-lint" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
@ -319,7 +319,7 @@
<delete failonerror="true" dir="${maven-build-dir}/"/>
</target>
<target name="generate-maven-artifacts" depends="resolve,resolve-groovy,resolve-pegdown,install-maven-tasks"
<target name="generate-maven-artifacts" depends="resolve,resolve-groovy,resolve-markdown,install-maven-tasks"
description="Generate Maven Artifacts for Lucene and Solr">
<property name="maven.dist.dir" location="dist/maven" />
<mkdir dir="${maven.dist.dir}" />
@ -543,7 +543,7 @@ File | Project Structure | Platform Settings | SDKs):
</target>
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it"
depends="clean,resolve-groovy,resolve-pegdown,install-maven-tasks">
depends="clean,resolve-groovy,resolve-markdown,install-maven-tasks">
<fail message="To run nightly smoke, the JDK must be exactly Java 1.8, was: ${java.specification.version}">
<condition>
<not><equals arg1="${java.specification.version}" arg2="1.8"/></not>
@ -788,7 +788,7 @@ Test args: [${args}]</echo>
</antcall>
</target>
<target name="jenkins-maven-nightly" depends="-print-java-info,clean,clean-maven-build,resolve-groovy,resolve-pegdown,install-maven-tasks">
<target name="jenkins-maven-nightly" depends="-print-java-info,clean,clean-maven-build,resolve-groovy,resolve-markdown,install-maven-tasks">
<!-- step 1: build, install, deploy, and validate ANT-generated maven artifacts: -->
<antcall>
<param name="is.jenkins.build" value="true"/>

View File

@ -245,6 +245,9 @@ Build
* LUCENE-7726: Fix HTML entity bugs in Javadocs to be able to build with
Java 9. (Uwe Schindler, Hossman)
* LUCENE-7727: Replace end-of-life Markdown parser "Pegdown" by "Flexmark"
for compatibility with Java 9. (Uwe Schindler)
Other
* LUCENE-7666: Fix typos in lucene-join package info javadoc.

View File

@ -208,7 +208,7 @@
<modules-crawl target="-ecj-javadoc-lint"/>
</target>
<target name="process-webpages" depends="resolve-pegdown">
<target name="process-webpages" depends="resolve-markdown">
<makeurl property="process-webpages.buildfiles" separator="|">
<fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,site/**"/>
</makeurl>
@ -237,10 +237,10 @@
<param name="defaultCodec" expression="${defaultCodec}"/>
</xslt>
<pegdown todir="${javadoc.dir}">
<markdown todir="${javadoc.dir}">
<fileset dir="." includes="MIGRATE.txt,JRE_VERSION_MIGRATION.txt,SYSTEM_REQUIREMENTS.txt"/>
<globmapper from="*.txt" to="*.html"/>
</pegdown>
</markdown>
<copy todir="${javadoc.dir}">
<fileset dir="site/html"/>

View File

@ -2410,61 +2410,74 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
</forbidden-apis>
</target>
<target name="resolve-pegdown" unless="pegdown.loaded" depends="resolve-groovy">
<ivy:cachepath organisation="org.pegdown" module="pegdown" revision="1.6.0"
inline="true" conf="default" transitive="true" pathid="pegdown.classpath"/>
<groovy classpathref="pegdown.classpath"><![CDATA[
<target name="resolve-markdown" unless="markdown.loaded" depends="resolve-groovy">
<property name="flexmark.version" value="0.16.1"/>
<ivy:cachepath transitive="true" pathid="markdown.classpath">
<ivy:dependency org="com.vladsch.flexmark" name="flexmark" rev="${flexmark.version}" conf="default" />
<ivy:dependency org="com.vladsch.flexmark" name="flexmark-ext-autolink" rev="${flexmark.version}" conf="default" />
<ivy:dependency org="com.vladsch.flexmark" name="flexmark-ext-abbreviation" rev="${flexmark.version}" conf="default" />
</ivy:cachepath>
<groovy classpathref="markdown.classpath"><![CDATA[
import org.apache.tools.ant.AntTypeDefinition;
import org.apache.tools.ant.ComponentHelper;
import org.apache.tools.ant.filters.TokenFilter.ChainableReaderFilter;
import org.pegdown.PegDownProcessor;
import org.pegdown.Extensions;
import org.pegdown.FastEncoder;
public final class PegDownFilter extends ChainableReaderFilter {
import com.vladsch.flexmark.ast.Node;
import com.vladsch.flexmark.ast.Heading;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.parser.ParserEmulationProfile;
import com.vladsch.flexmark.util.html.Escaping;
import com.vladsch.flexmark.util.options.MutableDataSet;
import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
public final class MarkdownFilter extends ChainableReaderFilter {
@Override
public String filter(String markdownSource) {
PegDownProcessor processor = new PegDownProcessor(
Extensions.ABBREVIATIONS | Extensions.AUTOLINKS |
Extensions.FENCED_CODE_BLOCKS | Extensions.SMARTS
);
MutableDataSet options = new MutableDataSet();
options.setFrom(ParserEmulationProfile.MARKDOWN);
options.set(Parser.EXTENSIONS, [ AbbreviationExtension.create(), AutolinkExtension.create() ]);
options.set(HtmlRenderer.RENDER_HEADER_ID, true);
options.set(HtmlRenderer.MAX_TRAILING_BLANK_LINES, 0);
Node parsed = Parser.builder(options).build().parse(markdownSource);
StringBuilder html = new StringBuilder('<html>\n<head>\n');
// match the first heading in markdown and use as title:
markdownSource.find(~/(?m)^#+\s*(.+)$/) {
match, title -> html.append('<title>').append(FastEncoder.encode(title)).append('</title>\n');
CharSequence title = parsed.getFirstChildAny(Heading.class)?.getText();
if (title != null) {
html.append('<title>').append(Escaping.escapeHtml(title, false)).append('</title>\n');
}
html.append('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n')
.append('</head>\n<body>\n')
.append(processor.markdownToHtml(markdownSource))
.append('\n</body>\n</html>\n');
.append('</head>\n<body>\n');
HtmlRenderer.builder(options).build().render(parsed, html);
html.append('</body>\n</html>\n');
return html;
}
}
AntTypeDefinition t = new AntTypeDefinition();
t.setName('pegdownfilter');
t.setClass(PegDownFilter.class);
t.setName('markdownfilter');
t.setClass(MarkdownFilter.class);
ComponentHelper.getComponentHelper(project).addDataTypeDefinition(t);
]]></groovy>
<property name="pegdown.loaded" value="true"/>
<property name="markdown.loaded" value="true"/>
</target>
<!-- PEGDOWN macro: Before using depend on the target "resolve-pegdown" -->
<!-- markdown macro: Before using depend on the target "resolve-markdown" -->
<macrodef name="pegdown">
<macrodef name="markdown">
<attribute name="todir"/>
<attribute name="flatten" default="false"/>
<attribute name="overwrite" default="false"/>
<element name="nested" optional="false" implicit="true"/>
<sequential>
<copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
preservelastmodified="false" encoding="UTF-8" taskname="pegdown"
preservelastmodified="false" encoding="UTF-8" taskname="markdown"
>
<filterchain>
<tokenfilter>
<filetokenizer/>
<replaceregex pattern="\b(LUCENE|SOLR)\-\d+\b" replace="[\0](https://issues.apache.org/jira/browse/\0)" flags="gs"/>
<pegdownfilter/>
<markdownfilter/>
</tokenfilter>
</filterchain>
<nested/>

View File

@ -202,9 +202,6 @@
<replaceregex pattern="src\s*=\s*&quot;images/" replace="src=&quot;/solr/assets/images/" flags="gs"/>
<!-- Redirect to the website's version-specific system requirements page -->
<replaceregex pattern="\(SYSTEM_REQUIREMENTS.html\)" replace="(/solr/api/SYSTEM_REQUIREMENTS.html)" flags="gs"/>
<!-- Remove name anchors. Unlike pegdown, the website markdown processor automatically attaches id-s to headers.
Exception: don't remove the "techproducts" anchor, because it has no following header. -->
<replaceregex pattern="&lt;a\s+name\s*=\s*&quot;(?!techproducts)[^&quot;]+&quot;\s*&gt;\s*&lt;/a&gt;\s*" replace="" flags="gs"/>
</tokenfilter>
</filterchain>
</copy>
@ -225,7 +222,7 @@
</copy>
</target>
<target name="process-webpages" depends="define-lucene-javadoc-url,resolve-pegdown">
<target name="process-webpages" depends="define-lucene-javadoc-url,resolve-markdown">
<makeurl property="process-webpages.buildfiles" separator="|">
<fileset dir="." includes="core/build.xml,test-framework/build.xml,solrj/build.xml,contrib/**/build.xml"/>
</makeurl>
@ -244,10 +241,10 @@
<param name="luceneJavadocUrl" expression="${lucene.javadoc.url}"/>
</xslt>
<pegdown todir="${javadoc.dir}">
<markdown todir="${javadoc.dir}">
<fileset dir="site" includes="**/*.mdtext"/>
<globmapper from="*.mdtext" to="*.html"/>
</pegdown>
</markdown>
<copy todir="${javadoc.dir}">
<fileset dir="site/assets" />

View File

@ -263,7 +263,6 @@ Execute the following command to delete a specific document:
bin/post -c gettingstarted -d "<delete><id>SP2514N</id></delete>"
<a name="searching"></a>
## Searching
Solr can be queried via REST clients, cURL, wget, Chrome POSTMAN, etc., as well as via the native clients available for
@ -594,7 +593,6 @@ Here's a Unix script for convenient copying and pasting in order to run the key
bin/solr healthcheck -c gettingstarted
date
<a name="cleanup"></a>
## Cleanup
As you work through this guide, you may want to stop Solr and reset the environment back to the starting point.