diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCRecord.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCRecord.java index e4041a1f156..3b79e5a61c0 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCRecord.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCRecord.java @@ -49,15 +49,20 @@ import java.util.regex.Pattern; /** * Immutable implementation of a record in a WARC file. You create a {@link WARCRecord} by parsing - * it out of a {@link DataInput} stream. The file format is documented in the [ISO - * Standard](http://bibnum.bnf.fr/warc/WARC_ISO_28500_version1_latestdraft.pdf). In a nutshell, it's - * a textual format consisting of lines delimited by `\r\n`. Each record has the following - * structure: 1. A line indicating the WARC version number, such as `WARC/1.0`. 2. Several header - * lines (in key-value format, similar to HTTP or email headers), giving information about the - * record. The header is terminated by an empty line. 3. A body consisting of raw bytes (the number - * of bytes is indicated in one of the headers). 4. A final separator of `\r\n\r\n` before the next - * record starts. There are various different types of records, as documented on - * {@link Header#getRecordType()}. + * it out of a {@link DataInput} stream. + *

+ * The file format is documented in the + * ISO Standard. In + * a nutshell, it's a textual format consisting of lines delimited by `\r\n`. Each record has the + * following structure: + *

    + *
  1. A line indicating the WARC version number, such as `WARC/1.0`.
  2. + *
  3. Several header lines (in key-value format, similar to HTTP or email headers), giving + * information about the record. The header is terminated by an empty line. + *
  4. A body consisting of raw bytes (the number of bytes is indicated in one of the headers). + *
  5. A final separator of `\r\n\r\n` before the next record starts. + *
+ * There are various different types of records, as documented on {@link Header#getRecordType()}. */ public class WARCRecord { @@ -176,9 +181,11 @@ public class WARCRecord { /** * Contains the parsed headers of a {@link WARCRecord}. Each record contains a number of headers * in key-value format, where some header keys are standardised, but nonstandard ones can be - * added. The documentation of the methods in this class is excerpted from the [WARC 1.0 - * specification](http://bibnum.bnf.fr/warc/WARC_ISO_28500_version1_latestdraft.pdf). Please see - * the specification for more detail. + * added. + *

+ * The documentation of the methods in this class is excerpted from the + * WARC 1.0 + * specification. Please see the specification for more detail. */ public final static class Header { private final Map fields; @@ -190,56 +197,69 @@ public class WARCRecord { /** * Returns the type of WARC record (the value of the `WARC-Type` header field). WARC 1.0 defines * the following record types: (for full definitions, see the - * [spec](http://bibnum.bnf.fr/warc/WARC_ISO_28500_version1_latestdraft.pdf)) * `warcinfo`: - * Describes the records that follow it, up through end of file, end of input, or until next - * `warcinfo` record. Typically, this appears once and at the beginning of a WARC file. For a - * web archive, it often contains information about the web crawl which generated the following - * records. The format of this descriptive record block may vary, though the use of the - * `"application/warc-fields"` content-type is recommended. (...) * `response`: The record - * should contain a complete scheme-specific response, including network protocol information - * where possible. For a target-URI of the `http` or `https` schemes, a `response` record block - * should contain the full HTTP response received over the network, including headers. That is, - * it contains the 'Response' message defined by section 6 of HTTP/1.1 (RFC2616). The WARC - * record's Content-Type field should contain the value defined by HTTP/1.1, + * spec. + *

* @return The record's `WARC-Type` header field, as a string. */ public String getRecordType() { @@ -272,8 +292,10 @@ public class WARCRecord { * The MIME type (RFC2045) of the information contained in the record's block. For example, in * HTTP request and response records, this would be `application/http` as per section 19.1 of * RFC2616 (or `application/http; msgtype=request` and `application/http; msgtype=response` - * respectively). In particular, the content-type is *not* the value of the HTTP Content-Type - * header in an HTTP response, but a MIME type to describe the full archived HTTP message (hence + * respectively). + *

+ * In particular, the content-type is *not* the value of the HTTP Content-Type header in an HTTP + * response, but a MIME type to describe the full archived HTTP message (hence * `application/http` if the block contains request or response headers). * @return The record's `Content-Type` header field, as a string. */