LUCENE-7727: Replace end-of-life Markdown parser "Pegdown" by "Flexmark" for compatibility with Java 9

This commit is contained in:
Uwe Schindler 2017-03-02 17:43:44 +01:00
parent 3087eb5006
commit 707d7b91e8
6 changed files with 53 additions and 42 deletions

View File

@ -89,7 +89,7 @@
<fail message="The Beast only works inside of individual modules"/> <fail message="The Beast only works inside of individual modules"/>
</target> </target>
<target name="documentation" depends="resolve-pegdown" description="Generate Lucene and Solr Documentation"> <target name="documentation" depends="resolve-markdown" description="Generate Lucene and Solr Documentation">
<subant target="documentation" inheritall="false" failonerror="true"> <subant target="documentation" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" /> <fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" /> <fileset dir="solr" includes="build.xml" />
@ -97,7 +97,7 @@
</subant> </subant>
</target> </target>
<target name="documentation-lint" depends="resolve-pegdown,-ecj-javadoc-lint-unsupported,-ecj-resolve" description="Validates the generated documentation (HTML errors, broken links,...)"> <target name="documentation-lint" depends="resolve-markdown,-ecj-javadoc-lint-unsupported,-ecj-resolve" description="Validates the generated documentation (HTML errors, broken links,...)">
<subant target="documentation-lint" inheritall="false" failonerror="true"> <subant target="documentation-lint" inheritall="false" failonerror="true">
<fileset dir="lucene" includes="build.xml" /> <fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" /> <fileset dir="solr" includes="build.xml" />
@ -319,7 +319,7 @@
<delete failonerror="true" dir="${maven-build-dir}/"/> <delete failonerror="true" dir="${maven-build-dir}/"/>
</target> </target>
<target name="generate-maven-artifacts" depends="resolve,resolve-groovy,resolve-pegdown,install-maven-tasks" <target name="generate-maven-artifacts" depends="resolve,resolve-groovy,resolve-markdown,install-maven-tasks"
description="Generate Maven Artifacts for Lucene and Solr"> description="Generate Maven Artifacts for Lucene and Solr">
<property name="maven.dist.dir" location="dist/maven" /> <property name="maven.dist.dir" location="dist/maven" />
<mkdir dir="${maven.dist.dir}" /> <mkdir dir="${maven.dist.dir}" />
@ -543,7 +543,7 @@ File | Project Structure | Platform Settings | SDKs):
</target> </target>
<target name="nightly-smoke" description="Builds an unsigned release and smoke tests it" <target name="nightly-smoke" description="Builds an unsigned release and smoke tests it"
depends="clean,resolve-groovy,resolve-pegdown,install-maven-tasks"> depends="clean,resolve-groovy,resolve-markdown,install-maven-tasks">
<fail message="To run nightly smoke, the JDK must be exactly Java 1.8, was: ${java.specification.version}"> <fail message="To run nightly smoke, the JDK must be exactly Java 1.8, was: ${java.specification.version}">
<condition> <condition>
<not><equals arg1="${java.specification.version}" arg2="1.8"/></not> <not><equals arg1="${java.specification.version}" arg2="1.8"/></not>
@ -788,7 +788,7 @@ Test args: [${args}]</echo>
</antcall> </antcall>
</target> </target>
<target name="jenkins-maven-nightly" depends="-print-java-info,clean,clean-maven-build,resolve-groovy,resolve-pegdown,install-maven-tasks"> <target name="jenkins-maven-nightly" depends="-print-java-info,clean,clean-maven-build,resolve-groovy,resolve-markdown,install-maven-tasks">
<!-- step 1: build, install, deploy, and validate ANT-generated maven artifacts: --> <!-- step 1: build, install, deploy, and validate ANT-generated maven artifacts: -->
<antcall> <antcall>
<param name="is.jenkins.build" value="true"/> <param name="is.jenkins.build" value="true"/>

View File

@ -245,6 +245,9 @@ Build
* LUCENE-7726: Fix HTML entity bugs in Javadocs to be able to build with * LUCENE-7726: Fix HTML entity bugs in Javadocs to be able to build with
Java 9. (Uwe Schindler, Hossman) Java 9. (Uwe Schindler, Hossman)
* LUCENE-7727: Replace end-of-life Markdown parser "Pegdown" by "Flexmark"
for compatibility with Java 9. (Uwe Schindler)
Other Other
* LUCENE-7666: Fix typos in lucene-join package info javadoc. * LUCENE-7666: Fix typos in lucene-join package info javadoc.

View File

@ -208,7 +208,7 @@
<modules-crawl target="-ecj-javadoc-lint"/> <modules-crawl target="-ecj-javadoc-lint"/>
</target> </target>
<target name="process-webpages" depends="resolve-pegdown"> <target name="process-webpages" depends="resolve-markdown">
<makeurl property="process-webpages.buildfiles" separator="|"> <makeurl property="process-webpages.buildfiles" separator="|">
<fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,site/**"/> <fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,site/**"/>
</makeurl> </makeurl>
@ -237,10 +237,10 @@
<param name="defaultCodec" expression="${defaultCodec}"/> <param name="defaultCodec" expression="${defaultCodec}"/>
</xslt> </xslt>
<pegdown todir="${javadoc.dir}"> <markdown todir="${javadoc.dir}">
<fileset dir="." includes="MIGRATE.txt,JRE_VERSION_MIGRATION.txt,SYSTEM_REQUIREMENTS.txt"/> <fileset dir="." includes="MIGRATE.txt,JRE_VERSION_MIGRATION.txt,SYSTEM_REQUIREMENTS.txt"/>
<globmapper from="*.txt" to="*.html"/> <globmapper from="*.txt" to="*.html"/>
</pegdown> </markdown>
<copy todir="${javadoc.dir}"> <copy todir="${javadoc.dir}">
<fileset dir="site/html"/> <fileset dir="site/html"/>

View File

@ -2410,61 +2410,74 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
</forbidden-apis> </forbidden-apis>
</target> </target>
<target name="resolve-pegdown" unless="pegdown.loaded" depends="resolve-groovy"> <target name="resolve-markdown" unless="markdown.loaded" depends="resolve-groovy">
<ivy:cachepath organisation="org.pegdown" module="pegdown" revision="1.6.0" <property name="flexmark.version" value="0.16.1"/>
inline="true" conf="default" transitive="true" pathid="pegdown.classpath"/> <ivy:cachepath transitive="true" pathid="markdown.classpath">
<groovy classpathref="pegdown.classpath"><![CDATA[ <ivy:dependency org="com.vladsch.flexmark" name="flexmark" rev="${flexmark.version}" conf="default" />
<ivy:dependency org="com.vladsch.flexmark" name="flexmark-ext-autolink" rev="${flexmark.version}" conf="default" />
<ivy:dependency org="com.vladsch.flexmark" name="flexmark-ext-abbreviation" rev="${flexmark.version}" conf="default" />
</ivy:cachepath>
<groovy classpathref="markdown.classpath"><![CDATA[
import org.apache.tools.ant.AntTypeDefinition; import org.apache.tools.ant.AntTypeDefinition;
import org.apache.tools.ant.ComponentHelper; import org.apache.tools.ant.ComponentHelper;
import org.apache.tools.ant.filters.TokenFilter.ChainableReaderFilter; import org.apache.tools.ant.filters.TokenFilter.ChainableReaderFilter;
import org.pegdown.PegDownProcessor; import com.vladsch.flexmark.ast.Node;
import org.pegdown.Extensions; import com.vladsch.flexmark.ast.Heading;
import org.pegdown.FastEncoder; import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.parser.ParserEmulationProfile;
import com.vladsch.flexmark.util.html.Escaping;
import com.vladsch.flexmark.util.options.MutableDataSet;
import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
public final class PegDownFilter extends ChainableReaderFilter { public final class MarkdownFilter extends ChainableReaderFilter {
@Override @Override
public String filter(String markdownSource) { public String filter(String markdownSource) {
PegDownProcessor processor = new PegDownProcessor( MutableDataSet options = new MutableDataSet();
Extensions.ABBREVIATIONS | Extensions.AUTOLINKS | options.setFrom(ParserEmulationProfile.MARKDOWN);
Extensions.FENCED_CODE_BLOCKS | Extensions.SMARTS options.set(Parser.EXTENSIONS, [ AbbreviationExtension.create(), AutolinkExtension.create() ]);
); options.set(HtmlRenderer.RENDER_HEADER_ID, true);
options.set(HtmlRenderer.MAX_TRAILING_BLANK_LINES, 0);
Node parsed = Parser.builder(options).build().parse(markdownSource);
StringBuilder html = new StringBuilder('<html>\n<head>\n'); StringBuilder html = new StringBuilder('<html>\n<head>\n');
// match the first heading in markdown and use as title: CharSequence title = parsed.getFirstChildAny(Heading.class)?.getText();
markdownSource.find(~/(?m)^#+\s*(.+)$/) { if (title != null) {
match, title -> html.append('<title>').append(FastEncoder.encode(title)).append('</title>\n'); html.append('<title>').append(Escaping.escapeHtml(title, false)).append('</title>\n');
} }
html.append('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n') html.append('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n')
.append('</head>\n<body>\n') .append('</head>\n<body>\n');
.append(processor.markdownToHtml(markdownSource)) HtmlRenderer.builder(options).build().render(parsed, html);
.append('\n</body>\n</html>\n'); html.append('</body>\n</html>\n');
return html; return html;
} }
} }
AntTypeDefinition t = new AntTypeDefinition(); AntTypeDefinition t = new AntTypeDefinition();
t.setName('pegdownfilter'); t.setName('markdownfilter');
t.setClass(PegDownFilter.class); t.setClass(MarkdownFilter.class);
ComponentHelper.getComponentHelper(project).addDataTypeDefinition(t); ComponentHelper.getComponentHelper(project).addDataTypeDefinition(t);
]]></groovy> ]]></groovy>
<property name="pegdown.loaded" value="true"/> <property name="markdown.loaded" value="true"/>
</target> </target>
<!-- PEGDOWN macro: Before using depend on the target "resolve-pegdown" --> <!-- markdown macro: Before using depend on the target "resolve-markdown" -->
<macrodef name="pegdown"> <macrodef name="markdown">
<attribute name="todir"/> <attribute name="todir"/>
<attribute name="flatten" default="false"/> <attribute name="flatten" default="false"/>
<attribute name="overwrite" default="false"/> <attribute name="overwrite" default="false"/>
<element name="nested" optional="false" implicit="true"/> <element name="nested" optional="false" implicit="true"/>
<sequential> <sequential>
<copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true" <copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
preservelastmodified="false" encoding="UTF-8" taskname="pegdown" preservelastmodified="false" encoding="UTF-8" taskname="markdown"
> >
<filterchain> <filterchain>
<tokenfilter> <tokenfilter>
<filetokenizer/> <filetokenizer/>
<replaceregex pattern="\b(LUCENE|SOLR)\-\d+\b" replace="[\0](https://issues.apache.org/jira/browse/\0)" flags="gs"/> <replaceregex pattern="\b(LUCENE|SOLR)\-\d+\b" replace="[\0](https://issues.apache.org/jira/browse/\0)" flags="gs"/>
<pegdownfilter/> <markdownfilter/>
</tokenfilter> </tokenfilter>
</filterchain> </filterchain>
<nested/> <nested/>

View File

@ -202,9 +202,6 @@
<replaceregex pattern="src\s*=\s*&quot;images/" replace="src=&quot;/solr/assets/images/" flags="gs"/> <replaceregex pattern="src\s*=\s*&quot;images/" replace="src=&quot;/solr/assets/images/" flags="gs"/>
<!-- Redirect to the website's version-specific system requirements page --> <!-- Redirect to the website's version-specific system requirements page -->
<replaceregex pattern="\(SYSTEM_REQUIREMENTS.html\)" replace="(/solr/api/SYSTEM_REQUIREMENTS.html)" flags="gs"/> <replaceregex pattern="\(SYSTEM_REQUIREMENTS.html\)" replace="(/solr/api/SYSTEM_REQUIREMENTS.html)" flags="gs"/>
<!-- Remove name anchors. Unlike pegdown, the website markdown processor automatically attaches id-s to headers.
Exception: don't remove the "techproducts" anchor, because it has no following header. -->
<replaceregex pattern="&lt;a\s+name\s*=\s*&quot;(?!techproducts)[^&quot;]+&quot;\s*&gt;\s*&lt;/a&gt;\s*" replace="" flags="gs"/>
</tokenfilter> </tokenfilter>
</filterchain> </filterchain>
</copy> </copy>
@ -225,7 +222,7 @@
</copy> </copy>
</target> </target>
<target name="process-webpages" depends="define-lucene-javadoc-url,resolve-pegdown"> <target name="process-webpages" depends="define-lucene-javadoc-url,resolve-markdown">
<makeurl property="process-webpages.buildfiles" separator="|"> <makeurl property="process-webpages.buildfiles" separator="|">
<fileset dir="." includes="core/build.xml,test-framework/build.xml,solrj/build.xml,contrib/**/build.xml"/> <fileset dir="." includes="core/build.xml,test-framework/build.xml,solrj/build.xml,contrib/**/build.xml"/>
</makeurl> </makeurl>
@ -244,10 +241,10 @@
<param name="luceneJavadocUrl" expression="${lucene.javadoc.url}"/> <param name="luceneJavadocUrl" expression="${lucene.javadoc.url}"/>
</xslt> </xslt>
<pegdown todir="${javadoc.dir}"> <markdown todir="${javadoc.dir}">
<fileset dir="site" includes="**/*.mdtext"/> <fileset dir="site" includes="**/*.mdtext"/>
<globmapper from="*.mdtext" to="*.html"/> <globmapper from="*.mdtext" to="*.html"/>
</pegdown> </markdown>
<copy todir="${javadoc.dir}"> <copy todir="${javadoc.dir}">
<fileset dir="site/assets" /> <fileset dir="site/assets" />

View File

@ -263,7 +263,6 @@ Execute the following command to delete a specific document:
bin/post -c gettingstarted -d "<delete><id>SP2514N</id></delete>" bin/post -c gettingstarted -d "<delete><id>SP2514N</id></delete>"
<a name="searching"></a>
## Searching ## Searching
Solr can be queried via REST clients, cURL, wget, Chrome POSTMAN, etc., as well as via the native clients available for Solr can be queried via REST clients, cURL, wget, Chrome POSTMAN, etc., as well as via the native clients available for
@ -594,7 +593,6 @@ Here's a Unix script for convenient copying and pasting in order to run the key
bin/solr healthcheck -c gettingstarted bin/solr healthcheck -c gettingstarted
date date
<a name="cleanup"></a>
## Cleanup ## Cleanup
As you work through this guide, you may want to stop Solr and reset the environment back to the starting point. As you work through this guide, you may want to stop Solr and reset the environment back to the starting point.