[CSV-209] Create CSVFormat.ORACLE preset. Also: Fix and complete

documentation for other formats.
This commit is contained in:
Gary Gregory 2018-04-03 17:37:03 -06:00
parent 83cd8088a6
commit a9daab6992
6 changed files with 72 additions and 11 deletions

View File

@ -44,6 +44,7 @@
<action issue="CSV-172" type="fix" dev="ggregory" due-to="Andrew Pennebaker">Don't quote cells just because they have UTF-8 encoded characters.</action>
<action issue="CSV-220" type="add" dev="ggregory" due-to="Gary Gregory">Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator().</action>
<action issue="CSV-223" type="fix" dev="ggregory" due-to="Samuel Martin">Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL.</action>
<action issue="CSV-209" type="fix" dev="ggregory" due-to="Gary Gregory">Create CSVFormat.ORACLE preset.</action>
</release>
<release version="1.5" date="2017-09-03" description="Feature and bug fix release">
<action issue="CSV-203" type="fix" dev="ggregory" due-to="Richard Wheeldon, Kai Paroth">withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17.</action>

View File

@ -189,6 +189,11 @@ public final class CSVFormat implements Serializable {
*/
MySQL(CSVFormat.MYSQL),
/**
* @see CSVFormat#ORACLE
*/
Oracle(CSVFormat.ORACLE),
/**
* @see CSVFormat#POSTGRESQL_CSV
* @since 1.5
@ -227,7 +232,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
*
* <p>
* Settings are:
@ -377,6 +382,44 @@ public final class CSVFormat implements Serializable {
.withQuoteMode(QuoteMode.ALL_NON_NULL);
// @formatter:off
/**
* Default Oracle format used by the SQL*Loader utility.
*
* <p>
* This is a comma-delimited format with the system line separator character as the record separator. Values are double quoted when needed and special
* characters are escaped with {@code '"'}. The default NULL string is {@code ""}. Values are trimmed.
* </p>
*
* <p>
* Settings are:
* </p>
* <ul>
* <li>withDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}</li>
* <li>withQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}</li>
* <li>withSystemRecordSeparator()</li>
* <li>withTrim()</li>
* <li>withIgnoreEmptyLines(false)</li>
* <li>withEscape('\\')</li>
* <li>withNullString("\\N")</li>
* <li>withQuoteMode(QuoteMode.MINIMAL)</li>
* </ul>
*
* @see Predefined#Oracle
* @see <a href="https://docs.oracle.com/database/121/SUTIL/GUID-D1762699-8154-40F6-90DE-EFB8EB6A9AB0.htm#SUTIL4217">https://docs.oracle.com/database/121/SUTIL/GUID-D1762699-8154-40F6-90DE-EFB8EB6A9AB0.htm#SUTIL4217</a>
* @since 1.6
*/
// @formatter:off
public static final CSVFormat ORACLE = DEFAULT
.withDelimiter(COMMA)
.withEscape(BACKSLASH)
.withIgnoreEmptyLines(false)
.withQuote(DOUBLE_QUOTE_CHAR)
.withNullString("\\N")
.withTrim()
.withSystemRecordSeparator()
.withQuoteMode(QuoteMode.MINIMAL);
// @formatter:off
/**
* Default PostgreSQL CSV format used by the {@code COPY} operation.
*
@ -399,7 +442,7 @@ public final class CSVFormat implements Serializable {
* </ul>
*
* @see Predefined#MySQL
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
* @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html"> https://www.postgresql.org/docs/current/static/sql-copy.html
* -data.html</a>
* @since 1.5
*/
@ -436,8 +479,7 @@ public final class CSVFormat implements Serializable {
* </ul>
*
* @see Predefined#MySQL
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
* -data.html</a>
* @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html"> https://www.postgresql.org/docs/current/static/sql-copy.html</a>
* @since 1.5
*/
// @formatter:off
@ -1934,7 +1976,7 @@ public final class CSVFormat implements Serializable {
* only works for inputs with '\n', '\r' and "\r\n"
* </p>
*
* @return A new CSVFormat that is equal to this but with the operating system's line separator stringr
* @return A new CSVFormat that is equal to this but with the operating system's line separator string.
* @since 1.6
*/
public CSVFormat withSystemRecordSeparator() {

View File

@ -30,6 +30,9 @@ limitations under the License.
<li><a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">Informix UNLOAD</a></li>
<li><a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">Informix UNLOAD CSV</a></li>
<li><a href="http://dev.mysql.com/doc/refman/5.0/en/mysqldump-delimited-text.html">MySQL</a></li>
<li><a href="hhttps://docs.oracle.com/database/121/SUTIL/GUID-D1762699-8154-40F6-90DE-EFB8EB6A9AB0.htm#SUTIL4217">Oracle</a></li>
<li><a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL CSV</a></li>
<li><a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL Text</a></li>
<li><a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a></li>
<li><a href="http://en.wikipedia.org/wiki/Tab-separated_values">TDF</a></li>
</ul>

View File

@ -34,12 +34,16 @@ limitations under the License.
The CSVFormat class provides some commonly used CSV variants:
<dl>
<dt>EXCEL</dt><dd>The Microsoft Excel CSV format.</dd>
<dt>INFORMIX_UNLOAD</dt><dd>Informix <a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">UNLOAD</a> format used by the <code>UNLOAD TO file_name</code> operation.</dd>
<dt>INFORMIX_UNLOAD_CSV</dt><dd>Informix <a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">CSV UNLOAD</a> format used by the <code>UNLOAD TO file_name</code> operation (escaping is disabled.)</dd>
<dt>MYSQL</dt><dd>The Oracle MySQL CSV format.</dd>
<dt>RFC-4180</dt><dd>The RFC-4180 format defined by <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a></dd>
<dt>TDF</dt><dd>A tab delimited format</dd>
<dr><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#DEFAULT">DEFAULT</a></dr><dd>Standard Comma Separated Value format, as for RFC4180 but allowing empty lines.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#EXCEL">EXCEL</a></dt><dd>The Microsoft Excel CSV format.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#INFORMIX_UNLOAD">INFORMIX_UNLOAD</a></dt><dd>Informix <a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">UNLOAD</a> format used by the <code>UNLOAD TO file_name</code> operation.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#INFORMIX_UNLOAD_CSV">INFORMIX_UNLOAD_CSV</a></dt><dd>Informix <a href="http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">CSV UNLOAD</a> format used by the <code>UNLOAD TO file_name</code> operation (escaping is disabled.)</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#MYSQL">MYSQL</a></dt><dd>The MySQL CSV format.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#ORACLE">ORACLE</a></dt><dd>Default Oracle format used by the SQL*Loader utility.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#POSTGRESSQL_CSV">POSTGRESSQL_CSV</a></dt><dd>Default PostgreSQL CSV format used by the COPY operation.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#POSTGRESSQL_TEXT">POSTGRESSQL_TEXT</a></dt><dd>Default PostgreSQL text format used by the COPY operation.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#RFC4180">RFC-4180</a></dt><dd>The RFC-4180 format defined by <a href="https://tools.ietf.org/html/rfc4180">RFC-4180</a>.</dd>
<dt><a href="http://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html#TDF">TDF</a></dt><dd>A tab delimited format.</dd>
</dl>
<subsection name="Example: Parsing an Excel CSV File">

View File

@ -45,6 +45,11 @@ public class CSVFormatPredefinedTest {
test(CSVFormat.MYSQL, "MySQL");
}
@Test
public void testOracle() {
test(CSVFormat.ORACLE, "Oracle");
}
@Test
public void testPostgreSqlCsv() {
test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv");

View File

@ -1182,6 +1182,12 @@ public class CSVPrinterTest {
doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST);
}
@Test
@Ignore
public void testRandomOracle() throws Exception {
doRandom(CSVFormat.ORACLE, ITERATIONS_FOR_RANDOM_TEST);
}
@Test
@Ignore
public void testRandomPostgreSqlCsv() throws Exception {