[CSV-182] Allow some printing operations directly from CSVFormat. Adds APIs to CSVFormat so update version from 1.3.1-SNAPSHOT to 1.4-SNAPHOT a la semver. This commit refactors a small bit of guts code from CSVPrinter to CSVFormat.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1743430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2016-05-11 21:09:48 +00:00
parent 21cb8b4750
commit 4a6af45908
4 changed files with 267 additions and 212 deletions

View File

@ -24,7 +24,7 @@ limitations under the License.
</parent>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.3.1-SNAPSHOT</version>
<version>1.4-SNAPSHOT</version>
<name>Apache Commons CSV</name>
<url>http://commons.apache.org/proper/commons-csv/</url>
<description>

View File

@ -38,8 +38,9 @@
<title>Release Notes</title>
</properties>
<body>
<release version="1.3.1" date="2016-MM-DD" description="Feature and bug fix release">
<release version="1.4" date="2016-MM-DD" description="Feature and bug fix release">
<action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action>
<action issue="CSV-182" type="update" dev="ggregory" due-to="Gary Gregory">Allow some printing operations directly from CSVFormat.</action>
</release>
<release version="1.3" date="2016-05-09" description="Feature and bug fix release">
<action issue="CSV-179" type="add" dev="britter">Add shortcut method for using first record as header to CSVFormat</action>

View File

@ -19,11 +19,13 @@ package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.BACKSLASH;
import static org.apache.commons.csv.Constants.COMMA;
import static org.apache.commons.csv.Constants.COMMENT;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.CRLF;
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.PIPE;
import static org.apache.commons.csv.Constants.SP;
import static org.apache.commons.csv.Constants.TAB;
import java.io.IOException;
@ -499,7 +501,8 @@ public final class CSVFormat implements Serializable {
* TODO
* @param trim
* TODO
* @param trailingDelimiter TODO
* @param trailingDelimiter
* TODO
* @throws IllegalArgumentException
* if the delimiter is a line break character
*/
@ -860,6 +863,242 @@ public final class CSVFormat implements Serializable {
return new CSVPrinter(out, this);
}
/**
* Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. Useful when
* one wants to avoid creating CSVPrinters.
*
* @param value
* value to be output.
* @param out
* where to print the value
* @param newRecord
* is this a new record
* @throws IOException
* If an I/O error occurs
* @since 1.4
*/
public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
// null values are considered empty
// Only call CharSequence.toString() if you have to, helps GC-free use cases.
CharSequence charSequence;
if (value == null) {
charSequence = nullString == null ? Constants.EMPTY : nullString;
} else {
charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString();
}
charSequence = getTrim() ? trim(charSequence) : charSequence;
this.print(value, charSequence, 0, charSequence.length(), out, newRecord);
}
private void print(final Object object, final CharSequence value, final int offset, final int len,
final Appendable out, final boolean newRecord) throws IOException {
if (!newRecord) {
out.append(getDelimiter());
}
if (object == null) {
out.append(value);
} else if (isQuoteCharacterSet()) {
// the original object is needed so can check for Number
printAndQuote(object, value, offset, len, out, newRecord);
} else if (isEscapeCharacterSet()) {
printAndEscape(value, offset, len, out);
} else {
out.append(value, offset, offset + len);
}
}
/*
* Note: must only be called if escaping is enabled, otherwise will generate NPE
*/
private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out)
throws IOException {
int start = offset;
int pos = offset;
final int end = offset + len;
final char delim = getDelimiter();
final char escape = getEscapeCharacter().charValue();
while (pos < end) {
char c = value.charAt(pos);
if (c == CR || c == LF || c == delim || c == escape) {
// write out segment up until this char
if (pos > start) {
out.append(value, start, pos);
}
if (c == LF) {
c = 'n';
} else if (c == CR) {
c = 'r';
}
out.append(escape);
out.append(c);
start = pos + 1; // start on the current char after this one
}
pos++;
}
// write last segment
if (pos > start) {
out.append(value, start, pos);
}
}
/*
* Note: must only be called if quoting is enabled, otherwise will generate NPE
*/
// the original object is needed so can check for Number
private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len,
final Appendable out, final boolean newRecord) throws IOException {
boolean quote = false;
int start = offset;
int pos = offset;
final int end = offset + len;
final char delimChar = getDelimiter();
final char quoteChar = getQuoteCharacter().charValue();
QuoteMode quoteModePolicy = getQuoteMode();
if (quoteModePolicy == null) {
quoteModePolicy = QuoteMode.MINIMAL;
}
switch (quoteModePolicy) {
case ALL:
quote = true;
break;
case NON_NUMERIC:
quote = !(object instanceof Number);
break;
case NONE:
// Use the existing escaping code
printAndEscape(value, offset, len, out);
return;
case MINIMAL:
if (len <= 0) {
// always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
if (newRecord) {
quote = true;
}
} else {
char c = value.charAt(pos);
// TODO where did this rule come from?
if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) {
quote = true;
} else if (c <= COMMENT) {
// Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative
// by including the default comment char too.
quote = true;
} else {
while (pos < end) {
c = value.charAt(pos);
if (c == LF || c == CR || c == quoteChar || c == delimChar) {
quote = true;
break;
}
pos++;
}
if (!quote) {
pos = end - 1;
c = value.charAt(pos);
// Some other chars at the end caused the parser to fail, so for now
// encapsulate if we end in anything less than ' '
if (c <= SP) {
quote = true;
}
}
}
}
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, end);
return;
}
break;
default:
throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
}
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, end);
return;
}
// we hit something that needed encapsulation
out.append(quoteChar);
// Pick up where we left off: pos should be positioned on the first character that caused
// the need for encapsulation.
while (pos < end) {
final char c = value.charAt(pos);
if (c == quoteChar) {
// write out the chunk up until this point
// add 1 to the length to write out the encapsulator also
out.append(value, start, pos + 1);
// put the next starting position on the encapsulator so we will
// write it out again with the next string (effectively doubling it)
start = pos;
}
pos++;
}
// write the last segment
out.append(value, start, pos);
out.append(quoteChar);
}
/**
* Outputs the record separator.
*
* @param out
* where to write
*
* @throws IOException
* If an I/O error occurs
* @since 1.4
*/
public void println(final Appendable out) throws IOException {
if (getTrailingDelimiter()) {
out.append(getDelimiter());
}
if (recordSeparator != null) {
out.append(recordSeparator);
}
}
/**
* Prints the given values a single record of delimiter separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
* separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}.
* </p>
*
* @param out where to write
* @param values
* values to output.
* @throws IOException
* If an I/O error occurs
* @since 1.4
*/
public void printRecord(final Appendable out, final Object... values) throws IOException {
for (int i = 0; i < values.length; i++) {
print(values[i], out, i == 0);
}
println(out);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
@ -917,6 +1156,23 @@ public final class CSVFormat implements Serializable {
return strings;
}
private CharSequence trim(final CharSequence charSequence) {
if (charSequence instanceof String) {
return ((String) charSequence).trim();
}
final int count = charSequence.length();
int len = count;
int pos = 0;
while ((pos < len) && (charSequence.charAt(pos) <= ' ')) {
pos++;
}
while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) {
len--;
}
return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence;
}
/**
* Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
*
@ -1083,6 +1339,7 @@ public final class CSVFormat implements Serializable {
* <p>
* Calling this method is equivalent to calling:
* </p>
*
* <pre>
* CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
* </pre>
@ -1114,8 +1371,8 @@ public final class CSVFormat implements Serializable {
* </p>
*
* @param headerEnum
* the enum defining the header, {@code null} if disabled, empty if parsed automatically, user
* specified otherwise.
* the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified
* otherwise.
*
* @return A new CSVFormat that is equal to this but with the specified header
* @see #withHeader(String...)

View File

@ -17,7 +17,6 @@
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.COMMENT;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.SP;
@ -120,204 +119,10 @@ public final class CSVPrinter implements Flushable, Closeable {
* If an I/O error occurs
*/
public void print(final Object value) throws IOException {
// null values are considered empty
// Only call CharSequence.toString() if you have to, helps GC-free use cases.
CharSequence charSequence;
if (value == null) {
final String nullString = format.getNullString();
charSequence = nullString == null ? Constants.EMPTY : nullString;
} else {
charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString();
}
charSequence = format.getTrim() ? trim(charSequence) : charSequence;
this.print(value, charSequence, 0, charSequence.length());
}
private CharSequence trim(final CharSequence charSequence) {
if (charSequence instanceof String) {
return ((String) charSequence).trim();
}
final int count = charSequence.length();
int len = count;
int pos = 0;
while ((pos < len) && (charSequence.charAt(pos) <= ' ')) {
pos++;
}
while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) {
len--;
}
return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence;
}
private void print(final Object object, final CharSequence value, final int offset, final int len)
throws IOException {
if (!newRecord) {
out.append(format.getDelimiter());
}
if (object == null) {
out.append(value);
} else if (format.isQuoteCharacterSet()) {
// the original object is needed so can check for Number
printAndQuote(object, value, offset, len);
} else if (format.isEscapeCharacterSet()) {
printAndEscape(value, offset, len);
} else {
out.append(value, offset, offset + len);
}
format.print(value, out, newRecord);
newRecord = false;
}
/*
* Note: must only be called if escaping is enabled, otherwise will generate NPE
*/
private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException {
int start = offset;
int pos = offset;
final int end = offset + len;
final char delim = format.getDelimiter();
final char escape = format.getEscapeCharacter().charValue();
while (pos < end) {
char c = value.charAt(pos);
if (c == CR || c == LF || c == delim || c == escape) {
// write out segment up until this char
if (pos > start) {
out.append(value, start, pos);
}
if (c == LF) {
c = 'n';
} else if (c == CR) {
c = 'r';
}
out.append(escape);
out.append(c);
start = pos + 1; // start on the current char after this one
}
pos++;
}
// write last segment
if (pos > start) {
out.append(value, start, pos);
}
}
/*
* Note: must only be called if quoting is enabled, otherwise will generate NPE
*/
// the original object is needed so can check for Number
private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len)
throws IOException {
boolean quote = false;
int start = offset;
int pos = offset;
final int end = offset + len;
final char delimChar = format.getDelimiter();
final char quoteChar = format.getQuoteCharacter().charValue();
QuoteMode quoteModePolicy = format.getQuoteMode();
if (quoteModePolicy == null) {
quoteModePolicy = QuoteMode.MINIMAL;
}
switch (quoteModePolicy) {
case ALL:
quote = true;
break;
case NON_NUMERIC:
quote = !(object instanceof Number);
break;
case NONE:
// Use the existing escaping code
printAndEscape(value, offset, len);
return;
case MINIMAL:
if (len <= 0) {
// always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
if (newRecord) {
quote = true;
}
} else {
char c = value.charAt(pos);
// TODO where did this rule come from?
if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) {
quote = true;
} else if (c <= COMMENT) {
// Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative
// by including the default comment char too.
quote = true;
} else {
while (pos < end) {
c = value.charAt(pos);
if (c == LF || c == CR || c == quoteChar || c == delimChar) {
quote = true;
break;
}
pos++;
}
if (!quote) {
pos = end - 1;
c = value.charAt(pos);
// Some other chars at the end caused the parser to fail, so for now
// encapsulate if we end in anything less than ' '
if (c <= SP) {
quote = true;
}
}
}
}
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, end);
return;
}
break;
default:
throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
}
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, end);
return;
}
// we hit something that needed encapsulation
out.append(quoteChar);
// Pick up where we left off: pos should be positioned on the first character that caused
// the need for encapsulation.
while (pos < end) {
final char c = value.charAt(pos);
if (c == quoteChar) {
// write out the chunk up until this point
// add 1 to the length to write out the encapsulator also
out.append(value, start, pos + 1);
// put the next starting position on the encapsulator so we will
// write it out again with the next string (effectively doubling it)
start = pos;
}
pos++;
}
// write the last segment
out.append(value, start, pos);
out.append(quoteChar);
}
/**
* Prints a comment on a new line among the delimiter separated values.
*
@ -370,13 +175,7 @@ public final class CSVPrinter implements Flushable, Closeable {
* If an I/O error occurs
*/
public void println() throws IOException {
if (format.getTrailingDelimiter()) {
out.append(format.getDelimiter());
}
final String recordSeparator = format.getRecordSeparator();
if (recordSeparator != null) {
out.append(recordSeparator);
}
format.println(out);
newRecord = true;
}
@ -414,10 +213,8 @@ public final class CSVPrinter implements Flushable, Closeable {
* If an I/O error occurs
*/
public void printRecord(final Object... values) throws IOException {
for (final Object value : values) {
print(value);
}
println();
format.printRecord(out, values);
newRecord = true;
}
/**