Updated the Javadoc

git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1297043 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Emmanuel Bourg 2012-03-05 13:08:19 +00:00
parent 312f5b033e
commit 7bd9d1d970
3 changed files with 41 additions and 50 deletions

View File

@ -46,7 +46,7 @@ public class CSVFormat implements Cloneable, Serializable {
*/
public static final char DISABLED = '\ufffe';
/** Standard comma separated format as defined by RFC 4180. */
/** Standard comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. */
public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, false, true);
/** Excel file format (using a comma as the value delimiter). */
@ -57,26 +57,33 @@ public class CSVFormat implements Cloneable, Serializable {
/**
* Creates a CSVFormat with the default parameters.
* Creates a CSV format with the default parameters.
*/
public CSVFormat() {
}
/**
* Creates a customized CSV format.
*
* @param delimiter the char used for value separation
* @param encapsulator the char used as value encapsulation marker
* @param commentStart the char used for comment identification
*/
public CSVFormat(char delimiter, char encapsulator, char commentStart) {
this(delimiter, encapsulator, commentStart, DISABLED, true, true, false, true);
}
/**
* Customized CSV format constructor.
* Creates a customized CSV format.
*
* @param delimiter a char used for value separation
* @param encapsulator a char used as value encapsulation marker
* @param commentStart a char used for comment identification
* @param escape a char used to escape special characters in values
* @param leadingSpacesIgnored TRUE when leading whitespaces should be ignored
* @param trailingSpacesIgnored TRUE when trailing whitespaces should be ignored
* @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted
* @param emptyLinesIgnored TRUE when the parser should skip emtpy lines
* @param delimiter the char used for value separation
* @param encapsulator the char used as value encapsulation marker
* @param commentStart the char used for comment identification
* @param escape the char used to escape special characters in values
* @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces should be ignored
* @param trailingSpacesIgnored <tt>true</tt> when trailing whitespaces should be ignored
* @param unicodeEscapesInterpreted <tt>true</tt> when unicode escapes should be interpreted
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
*/
public CSVFormat(
char delimiter,

View File

@ -64,15 +64,14 @@ public class CSVParser implements Iterable<String[]> {
/** Immutable empty String array. */
private static final String[] EMPTY_STRING_ARRAY = new String[0];
// the input stream
/** The input stream */
private final ExtendedBufferedReader in;
private final CSVFormat format;
// the following objects are shared to reduce garbage
/**
* A record buffer for getLine(). Grows as necessary and is reused.
*/
/** A record buffer for getLine(). Grows as necessary and is reused. */
private final List<String> record = new ArrayList<String>();
private final Token reusableToken = new Token();
private final CharBuffer wsBuf = new CharBuffer();
@ -146,12 +145,10 @@ public class CSVParser implements Iterable<String[]> {
// ======================================================
/**
* Parses the CSV according to the given format
* and returns the content as an array of records
* (whereas records are arrays of single values).
* Parses the CSV according to the given format and returns the content
* as an array of records (whereas records are arrays of single values).
* <p/>
* The returned content starts at the current parse-position in
* the stream.
* The returned content starts at the current parse-position in the stream.
*
* @return matrix of records x values ('null' when end of file)
* @throws IOException on parse error or input read-failure
@ -171,11 +168,9 @@ public class CSVParser implements Iterable<String[]> {
}
/**
* Parses from the current point in the stream til
* the end of the current line.
* Parses from the current point in the stream til * the end of the current line.
*
* @return array of values til end of line
* ('null' when end of file has been reached)
* @return array of values til end of line ('null' when end of file has been reached)
* @throws IOException on parse error or input read-failure
*/
String[] getLine() throws IOException {
@ -209,7 +204,7 @@ public class CSVParser implements Iterable<String[]> {
}
}
if (!record.isEmpty()) {
ret = (String[]) record.toArray(new String[record.size()]);
ret = record.toArray(new String[record.size()]);
}
return ret;
}
@ -283,11 +278,9 @@ public class CSVParser implements Iterable<String[]> {
/**
* Returns the next token.
* <p/>
* A token corresponds to a term, a record change or an
* end-of-file indicator.
* A token corresponds to a term, a record change or an end-of-file indicator.
*
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the
* Token.
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
* @return the next token found
* @throws IOException on stream access error
*/
@ -380,9 +373,9 @@ public class CSVParser implements Iterable<String[]> {
* A simple token might contain escaped delimiters (as \, or \;). The
* token is finished when one of the following conditions become true:
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param tkn the current token
@ -476,19 +469,13 @@ public class CSVParser implements Iterable<String[]> {
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException(
"(line " + getLineNumber()
+ ") invalid char between encapsulated token end delimiter"
);
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
}
}
}
} else if (isEndOfFile(c)) {
// error condition (end of file before end of token)
throw new IOException(
"(startline " + startLineNumber + ")"
+ "eof reached before encapsulated token finished"
);
throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
} else {
// consume character
tkn.content.append((char) c);
@ -500,8 +487,7 @@ public class CSVParser implements Iterable<String[]> {
/**
* Decodes Unicode escapes.
* <p/>
* Interpretation of "\\uXXXX" escape sequences
* where XXXX is a hex-number.
* Interpretation of "\\uXXXX" escape sequences where XXXX is a hex-number.
*
* @param c current char which is discarded because it's the "\\" of "\\uXXXX"
* @return the decoded character
@ -555,10 +541,6 @@ public class CSVParser implements Iterable<String[]> {
return out;
}
// ======================================================
// strategies
// ======================================================
/**
* Obtain the specified CSV format.
*

View File

@ -35,7 +35,7 @@
record := values*
</pre>
<p>The following list contains the csv aspects the WAKE CSV parser supports:</p>
<p>The following list contains the csv aspects the Commons CSV parser supports:</p>
<dl>
<dt>Separators (for lines)</dt>
<dd>The record separators are hardcoded and cannot be changed. The must be '\n' or '\r\n'.</dd>
@ -76,9 +76,11 @@
<p>Example usage:</p>
<blockquote><pre>
String[] parsedLine = CSVParser.parseLine("a,b,c");
for (int i = 0; i &lt; parsedLine.length; ++i) {
System.out.println("value " + i + "=" + parsedLine[i]);
Reader in = new StringReader("a,b,c");
for (String[] line : CSVFormat.DEFAULT.parse(in)) {
for (int i = 0; i &lt; line.length; i++) {
System.out.println("value " + i + "=" + line[i]);
}
}
</pre></blockquote>
</body>