fix and detect invalid html tags in javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1401947 13f79535-47bb-0310-9956-ffa450edef68
2012-10-25 01:57:46 +00:00 · 2012-10-25 01:57:46 +00:00 · feddbabf80
parent d4108c2b80
commit feddbabf80
13 changed files with 53 additions and 17 deletions
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
@ -24,7 +24,7 @@ import java.util.Locale;
 import java.util.Map;

 /** 
- * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
+ * Parser for trec doc content, invoked on doc text excluding &lt;DOC&gt; and &lt;DOCNO&gt;
 * which are handled in TrecContentSource. Required to be stateless and hence thread safe. 
 */
 public abstract class TrecDocParser {
--- a/lucene/build.xml
+++ b/lucene/build.xml
@ -231,6 +231,13 @@

  <!-- we check for broken links across all documentation -->
  <target name="-documentation-lint" if="documentation-lint.supported" depends="documentation">
+    <echo message="checking for broken html..."/>
+    <jtidy-macro>
+       <!-- NOTE: must currently exclude deprecated-list due to a javadocs bug (as of 1.7.0_09)
+            javadocs generates invalid XML if you deprecate a method that takes a parameter
+            with a generic type -->
+      <fileset dir="build/docs" includes="**/*.html" excludes="**/deprecated-list.html"/>
+    </jtidy-macro>
    <echo message="Checking for broken links..."/>
    <check-broken-links dir="build/docs"/>
    <echo message="Checking for missing docs..."/>
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@ -1565,6 +1565,25 @@ ${tests-output}/junit4-*.suites     - per-JVM executed suites
    </sequential>
  </macrodef>

+  <!-- TODO: if we make a custom ant task, we can give better
+       errors and stuff here, and not make a stupid temp dir -->
+  <macrodef name="jtidy-macro">
+    <element name="nested" implicit="yes" optional="yes"/>
+    <sequential>
+      <ivy:cachepath organisation="net.sf.jtidy" module="jtidy" revision="r938"
+          log="download-only" inline="true" conf="master" type="jar" pathid="jtidy.classpath" />
+      <taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask" classpathref="jtidy.classpath"/>
+      <delete dir="${common.dir}/build/jtidy_tmp" quiet="true"/>
+      <echo message="Checking for broken html (such as invalid tags)..." taskname="jtidy"/>
+      <tidy failonerror="true" destdir="${common.dir}/build/jtidy_tmp">
+         <nested/>
+         <parameter name="input-encoding" value="UTF-8" />
+         <parameter name="only-errors" value="true" />
+      </tidy>
+      <delete dir="${common.dir}/build/jtidy_tmp" quiet="true"/>
+    </sequential>
+  </macrodef>
+
  <property name="failonjavadocwarning" value="true"/>
  <macrodef name="invoke-javadoc">
    <element name="sources" optional="yes"/>
--- a/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
@ -31,7 +31,7 @@ import org.apache.lucene.util.BytesRef;
 // TODO: we need to break out separate StoredField...

 /** Represents a single field for indexing.  IndexWriter
- *  consumes Iterable<IndexableField> as a document.
+ *  consumes Iterable&lt;IndexableField&gt; as a document.
 *
 *  @lucene.experimental */

--- a/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java
@ -59,7 +59,7 @@ import org.apache.lucene.util.encoding.IntEncoder;
 * DirectoryReader reader = DirectoryReader.open(oldDir);
 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
 * IndexWriter writer = new IndexWriter(newDir, conf);
- * List<AtomicReaderContext> leaves = reader.leaves();
+ * List&lt;AtomicReaderContext&gt; leaves = reader.leaves();
 *   AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
 *   for (int i = 0; i < leaves.size(); i++) {
 *     wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordmap);
--- a/lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryAttributesStream.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryAttributesStream.java
@ -32,7 +32,7 @@ import org.apache.lucene.facet.index.attributes.CategoryAttribute;
 * <P>
 * A CategoryAttributesStream object can be reused for producing more than one
 * stream. To do that, the user should cause the underlying
- * Iterable<CategoryAttribute> object to return a new set of categories, and
+ * Iterable&lt;CategoryAttribute&gt; object to return a new set of categories, and
 * then call {@link #reset()} to allow this stream to be used again.
 * 
 * @lucene.experimental
--- a/solr/build.xml
+++ b/solr/build.xml
@ -526,6 +526,12 @@
  <!-- TODO: does solr have any other docs we should check? -->
  <!-- TODO: also integrate checkJavaDocs.py, which does more checks -->
  <target name="-documentation-lint" if="documentation-lint.supported" depends="documentation">
+    <jtidy-macro>
+       <!-- NOTE: must currently exclude deprecated-list due to a javadocs bug (as of 1.7.0_09)
+            javadocs generates invalid XML if you deprecate a method that takes a parameter
+            with a generic type -->
+      <fileset dir="build/docs" includes="**/*.html" excludes="**/deprecated-list.html"/>
+    </jtidy-macro>
    <echo message="Checking for broken links..."/>
    <check-broken-links dir="${javadoc.dir}"/>
    <echo message="Checking for malformed docs..."/>
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Context.java
@ -87,7 +87,7 @@ public abstract class Context {

  /**
   * Returns the VariableResolver used in this entity which can be used to
-   * resolve the tokens in ${<namespce.name>}
+   * resolve the tokens in ${&lt;namespce.name&gt;}
   *
   * @return a VariableResolver instance
   * @see org.apache.solr.handler.dataimport.VariableResolver
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
@ -32,7 +32,7 @@ import java.util.Properties;
 * <p/>
 * The datasouce may be configured as follows
 * <p/>
- * <datasource name="f1" type="FieldReaderDataSource" />
+ * &lt;datasource name="f1" type="FieldReaderDataSource" /&gt;
 * <p/>
 * The enity which uses this datasource must keep the url value as the variable name url="field-name"
 * <p/>
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
 * /a/b/c
 * </pre>
 * A record is a Map&lt;String,Object&gt; . The key is the provided name
- * and the value is a String or a List<String>
+ * and the value is a String or a List&lt;String&gt;
 *
 * This class is thread-safe for parsing xml. But adding fields is not
 * thread-safe. The recommended usage is to addField() in one thread and 
@ -651,10 +651,10 @@ public class XPathRecordReader {
    /**
     * @param record The record map. The key is the field name as provided in 
     * the addField() methods. The value can be a single String (for single 
-     * valued fields) or a List<String> (for multiValued).
+     * valued fields) or a List&lt;String&gt; (for multiValued).
     * @param xpath The forEach XPATH for which this record is being emitted
     * If there is any change all parsing will be aborted and the Exception
-     * is propogated up
+     * is propagated up
     */
    public void handle(Map<String, Object> record, String xpath);
  }
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingParams.java
@ -159,7 +159,7 @@ public interface ExtractingParams {
   * File format is Java properties format with one key=value per line.
   * The key is evaluated as a regex against the file name, and the value is the password
   * The rules are evaluated top-bottom, i.e. the first match will be used
-   * If you want a fallback password to be always used, supply a .*=<defaultmypassword> at the end  
+   * If you want a fallback password to be always used, supply a .*=&lt;defaultmypassword&gt; at the end  
   */
  public static final String PASSWORD_MAP_FILE = "passwordsFile";
 }
--- a/solr/core/src/java/org/apache/solr/search/SurroundQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/SurroundQParserPlugin.java
@ -34,8 +34,8 @@ import org.slf4j.LoggerFactory;
 * Plugin for lucene/contrib Surround query parser, bringing SpanQuery support
 * to Solr
 * 
- * <queryParser name="surround"
- * class="org.apache.solr.search.SurroundQParserPlugin" />
+ * &lt;queryParser name="surround"
+ * class="org.apache.solr.search.SurroundQParserPlugin" /&gt;
 * 
 * Examples of query syntax can be found in lucene/queryparser/docs/surround
 * 
--- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
@ -493,20 +493,24 @@ public class SolrDispatchFilter implements Filter
   * filter into a larger web application.
   *
   * For example, if web.xml specifies:
-   *
+   * <pre class="prettyprint">
+   * {@code
   * <filter-mapping>
   *  <filter-name>SolrRequestFilter</filter-name>
   *  <url-pattern>/xxx/*</url-pattern>
-   * </filter-mapping>
+   * </filter-mapping>}
+   * </pre>
   *
   * Make sure to set the PathPrefix to "/xxx" either with this function
   * or in web.xml.
   *
+   * <pre class="prettyprint">
+   * {@code
   * <init-param>
   *  <param-name>path-prefix</param-name>
   *  <param-value>/xxx</param-value>
-   * </init-param>
-   *
+   * </init-param>}
+   * </pre>
   */
  public void setPathPrefix(String pathPrefix) {
    this.pathPrefix = pathPrefix;