From 4a6af4590838e8804290464a8d88a837acb2fa08 Mon Sep 17 00:00:00 2001 From: "Gary D. Gregory" Date: Wed, 11 May 2016 21:09:48 +0000 Subject: [PATCH] [CSV-182] Allow some printing operations directly from CSVFormat. Adds APIs to CSVFormat so update version from 1.3.1-SNAPSHOT to 1.4-SNAPHOT a la semver. This commit refactors a small bit of guts code from CSVPrinter to CSVFormat. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1743430 13f79535-47bb-0310-9956-ffa450edef68 --- pom.xml | 2 +- src/changes/changes.xml | 3 +- .../org/apache/commons/csv/CSVFormat.java | 263 +++++++++++++++++- .../org/apache/commons/csv/CSVPrinter.java | 211 +------------- 4 files changed, 267 insertions(+), 212 deletions(-) diff --git a/pom.xml b/pom.xml index 3e35f476..21b08477 100644 --- a/pom.xml +++ b/pom.xml @@ -24,7 +24,7 @@ limitations under the License. org.apache.commons commons-csv - 1.3.1-SNAPSHOT + 1.4-SNAPSHOT Apache Commons CSV http://commons.apache.org/proper/commons-csv/ diff --git a/src/changes/changes.xml b/src/changes/changes.xml index c9e0e3cd..601d54a2 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -38,8 +38,9 @@ Release Notes - + Make CSVPrinter.print(Object) GC-free. + Allow some printing operations directly from CSVFormat. Add shortcut method for using first record as header to CSVFormat diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 14d24106..e19a997b 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -19,11 +19,13 @@ package org.apache.commons.csv; import static org.apache.commons.csv.Constants.BACKSLASH; import static org.apache.commons.csv.Constants.COMMA; +import static org.apache.commons.csv.Constants.COMMENT; import static org.apache.commons.csv.Constants.CR; import static org.apache.commons.csv.Constants.CRLF; import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; import static org.apache.commons.csv.Constants.LF; import static org.apache.commons.csv.Constants.PIPE; +import static org.apache.commons.csv.Constants.SP; import static org.apache.commons.csv.Constants.TAB; import java.io.IOException; @@ -499,7 +501,8 @@ public final class CSVFormat implements Serializable { * TODO * @param trim * TODO - * @param trailingDelimiter TODO + * @param trailingDelimiter + * TODO * @throws IllegalArgumentException * if the delimiter is a line break character */ @@ -860,6 +863,242 @@ public final class CSVFormat implements Serializable { return new CSVPrinter(out, this); } + /** + * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. Useful when + * one wants to avoid creating CSVPrinters. + * + * @param value + * value to be output. + * @param out + * where to print the value + * @param newRecord + * is this a new record + * @throws IOException + * If an I/O error occurs + * @since 1.4 + */ + public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { + // null values are considered empty + // Only call CharSequence.toString() if you have to, helps GC-free use cases. + CharSequence charSequence; + if (value == null) { + charSequence = nullString == null ? Constants.EMPTY : nullString; + } else { + charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString(); + } + charSequence = getTrim() ? trim(charSequence) : charSequence; + this.print(value, charSequence, 0, charSequence.length(), out, newRecord); + } + + private void print(final Object object, final CharSequence value, final int offset, final int len, + final Appendable out, final boolean newRecord) throws IOException { + if (!newRecord) { + out.append(getDelimiter()); + } + if (object == null) { + out.append(value); + } else if (isQuoteCharacterSet()) { + // the original object is needed so can check for Number + printAndQuote(object, value, offset, len, out, newRecord); + } else if (isEscapeCharacterSet()) { + printAndEscape(value, offset, len, out); + } else { + out.append(value, offset, offset + len); + } + } + + /* + * Note: must only be called if escaping is enabled, otherwise will generate NPE + */ + private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out) + throws IOException { + int start = offset; + int pos = offset; + final int end = offset + len; + + final char delim = getDelimiter(); + final char escape = getEscapeCharacter().charValue(); + + while (pos < end) { + char c = value.charAt(pos); + if (c == CR || c == LF || c == delim || c == escape) { + // write out segment up until this char + if (pos > start) { + out.append(value, start, pos); + } + if (c == LF) { + c = 'n'; + } else if (c == CR) { + c = 'r'; + } + + out.append(escape); + out.append(c); + + start = pos + 1; // start on the current char after this one + } + + pos++; + } + + // write last segment + if (pos > start) { + out.append(value, start, pos); + } + } + + /* + * Note: must only be called if quoting is enabled, otherwise will generate NPE + */ + // the original object is needed so can check for Number + private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len, + final Appendable out, final boolean newRecord) throws IOException { + boolean quote = false; + int start = offset; + int pos = offset; + final int end = offset + len; + + final char delimChar = getDelimiter(); + final char quoteChar = getQuoteCharacter().charValue(); + + QuoteMode quoteModePolicy = getQuoteMode(); + if (quoteModePolicy == null) { + quoteModePolicy = QuoteMode.MINIMAL; + } + switch (quoteModePolicy) { + case ALL: + quote = true; + break; + case NON_NUMERIC: + quote = !(object instanceof Number); + break; + case NONE: + // Use the existing escaping code + printAndEscape(value, offset, len, out); + return; + case MINIMAL: + if (len <= 0) { + // always quote an empty token that is the first + // on the line, as it may be the only thing on the + // line. If it were not quoted in that case, + // an empty line has no tokens. + if (newRecord) { + quote = true; + } + } else { + char c = value.charAt(pos); + + // TODO where did this rule come from? + if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) { + quote = true; + } else if (c <= COMMENT) { + // Some other chars at the start of a value caused the parser to fail, so for now + // encapsulate if we start in anything less than '#'. We are being conservative + // by including the default comment char too. + quote = true; + } else { + while (pos < end) { + c = value.charAt(pos); + if (c == LF || c == CR || c == quoteChar || c == delimChar) { + quote = true; + break; + } + pos++; + } + + if (!quote) { + pos = end - 1; + c = value.charAt(pos); + // Some other chars at the end caused the parser to fail, so for now + // encapsulate if we end in anything less than ' ' + if (c <= SP) { + quote = true; + } + } + } + } + + if (!quote) { + // no encapsulation needed - write out the original value + out.append(value, start, end); + return; + } + break; + default: + throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); + } + + if (!quote) { + // no encapsulation needed - write out the original value + out.append(value, start, end); + return; + } + + // we hit something that needed encapsulation + out.append(quoteChar); + + // Pick up where we left off: pos should be positioned on the first character that caused + // the need for encapsulation. + while (pos < end) { + final char c = value.charAt(pos); + if (c == quoteChar) { + // write out the chunk up until this point + + // add 1 to the length to write out the encapsulator also + out.append(value, start, pos + 1); + // put the next starting position on the encapsulator so we will + // write it out again with the next string (effectively doubling it) + start = pos; + } + pos++; + } + + // write the last segment + out.append(value, start, pos); + out.append(quoteChar); + } + + /** + * Outputs the record separator. + * + * @param out + * where to write + * + * @throws IOException + * If an I/O error occurs + * @since 1.4 + */ + public void println(final Appendable out) throws IOException { + if (getTrailingDelimiter()) { + out.append(getDelimiter()); + } + if (recordSeparator != null) { + out.append(recordSeparator); + } + } + + /** + * Prints the given values a single record of delimiter separated values followed by the record separator. + * + *

+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record + * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. + *

+ * + * @param out where to write + * @param values + * values to output. + * @throws IOException + * If an I/O error occurs + * @since 1.4 + */ + public void printRecord(final Appendable out, final Object... values) throws IOException { + for (int i = 0; i < values.length; i++) { + print(values[i], out, i == 0); + } + println(out); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); @@ -917,6 +1156,23 @@ public final class CSVFormat implements Serializable { return strings; } + private CharSequence trim(final CharSequence charSequence) { + if (charSequence instanceof String) { + return ((String) charSequence).trim(); + } + final int count = charSequence.length(); + int len = count; + int pos = 0; + + while ((pos < len) && (charSequence.charAt(pos) <= ' ')) { + pos++; + } + while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) { + len--; + } + return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence; + } + /** * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. * @@ -1083,6 +1339,7 @@ public final class CSVFormat implements Serializable { *

* Calling this method is equivalent to calling: *

+ * *
      * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
      * 
@@ -1114,8 +1371,8 @@ public final class CSVFormat implements Serializable { *

* * @param headerEnum - * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user - * specified otherwise. + * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified + * otherwise. * * @return A new CSVFormat that is equal to this but with the specified header * @see #withHeader(String...) diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index cdd2c11e..265d11d0 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -17,7 +17,6 @@ package org.apache.commons.csv; -import static org.apache.commons.csv.Constants.COMMENT; import static org.apache.commons.csv.Constants.CR; import static org.apache.commons.csv.Constants.LF; import static org.apache.commons.csv.Constants.SP; @@ -120,204 +119,10 @@ public final class CSVPrinter implements Flushable, Closeable { * If an I/O error occurs */ public void print(final Object value) throws IOException { - // null values are considered empty - // Only call CharSequence.toString() if you have to, helps GC-free use cases. - CharSequence charSequence; - if (value == null) { - final String nullString = format.getNullString(); - charSequence = nullString == null ? Constants.EMPTY : nullString; - } else { - charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString(); - } - charSequence = format.getTrim() ? trim(charSequence) : charSequence; - this.print(value, charSequence, 0, charSequence.length()); - } - - private CharSequence trim(final CharSequence charSequence) { - if (charSequence instanceof String) { - return ((String) charSequence).trim(); - } - final int count = charSequence.length(); - int len = count; - int pos = 0; - - while ((pos < len) && (charSequence.charAt(pos) <= ' ')) { - pos++; - } - while ((pos < len) && (charSequence.charAt(len - 1) <= ' ')) { - len--; - } - return (pos > 0) || (len < count) ? charSequence.subSequence(pos, len) : charSequence; - } - - private void print(final Object object, final CharSequence value, final int offset, final int len) - throws IOException { - if (!newRecord) { - out.append(format.getDelimiter()); - } - if (object == null) { - out.append(value); - } else if (format.isQuoteCharacterSet()) { - // the original object is needed so can check for Number - printAndQuote(object, value, offset, len); - } else if (format.isEscapeCharacterSet()) { - printAndEscape(value, offset, len); - } else { - out.append(value, offset, offset + len); - } + format.print(value, out, newRecord); newRecord = false; } - /* - * Note: must only be called if escaping is enabled, otherwise will generate NPE - */ - private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException { - int start = offset; - int pos = offset; - final int end = offset + len; - - final char delim = format.getDelimiter(); - final char escape = format.getEscapeCharacter().charValue(); - - while (pos < end) { - char c = value.charAt(pos); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(value, start, pos); - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append(c); - - start = pos + 1; // start on the current char after this one - } - - pos++; - } - - // write last segment - if (pos > start) { - out.append(value, start, pos); - } - } - - /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE - */ - // the original object is needed so can check for Number - private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len) - throws IOException { - boolean quote = false; - int start = offset; - int pos = offset; - final int end = offset + len; - - final char delimChar = format.getDelimiter(); - final char quoteChar = format.getQuoteCharacter().charValue(); - - QuoteMode quoteModePolicy = format.getQuoteMode(); - if (quoteModePolicy == null) { - quoteModePolicy = QuoteMode.MINIMAL; - } - switch (quoteModePolicy) { - case ALL: - quote = true; - break; - case NON_NUMERIC: - quote = !(object instanceof Number); - break; - case NONE: - // Use the existing escaping code - printAndEscape(value, offset, len); - return; - case MINIMAL: - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (newRecord) { - quote = true; - } - } else { - char c = value.charAt(pos); - - // TODO where did this rule come from? - if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) { - quote = true; - } else if (c <= COMMENT) { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { - while (pos < end) { - c = value.charAt(pos); - if (c == LF || c == CR || c == quoteChar || c == delimChar) { - quote = true; - break; - } - pos++; - } - - if (!quote) { - pos = end - 1; - c = value.charAt(pos); - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= SP) { - quote = true; - } - } - } - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - break; - default: - throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - - // we hit something that needed encapsulation - out.append(quoteChar); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (pos < end) { - final char c = value.charAt(pos); - if (c == quoteChar) { - // write out the chunk up until this point - - // add 1 to the length to write out the encapsulator also - out.append(value, start, pos + 1); - // put the next starting position on the encapsulator so we will - // write it out again with the next string (effectively doubling it) - start = pos; - } - pos++; - } - - // write the last segment - out.append(value, start, pos); - out.append(quoteChar); - } - /** * Prints a comment on a new line among the delimiter separated values. * @@ -370,13 +175,7 @@ public final class CSVPrinter implements Flushable, Closeable { * If an I/O error occurs */ public void println() throws IOException { - if (format.getTrailingDelimiter()) { - out.append(format.getDelimiter()); - } - final String recordSeparator = format.getRecordSeparator(); - if (recordSeparator != null) { - out.append(recordSeparator); - } + format.println(out); newRecord = true; } @@ -414,10 +213,8 @@ public final class CSVPrinter implements Flushable, Closeable { * If an I/O error occurs */ public void printRecord(final Object... values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); + format.printRecord(out, values); + newRecord = true; } /**