CSVStrategy is now immutable (SANDBOX-279)

git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1199827 13f79535-47bb-0310-9956-ffa450edef68
2025-03-02 23:09:17 +00:00 · 2011-11-09 16:21:23 +00:00 · 2011-11-09 16:21:23 +00:00 · 42476f4b08
commit 42476f4b08
parent fc4ccb426e
5 changed files with 126 additions and 136 deletions
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@ -122,7 +122,7 @@ public class CSVParser {
     * @param input a Reader containing "csv-formatted" input
     */
    public CSVParser(Reader input) {
-        this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone());
+        this(input, CSVStrategy.DEFAULT_STRATEGY);
    }

    /**
@ -260,7 +260,7 @@ public class CSVParser {
        c = in.readAgain();

        //  empty line detection: eol AND (last char was EOL or beginning)
-        while (strategy.getIgnoreEmptyLines() && eol
+        while (strategy.isEmptyLinesIgnored() && eol
                && (lastChar == '\n'
                || lastChar == '\r'
                || lastChar == ExtendedBufferedReader.UNDEFINED)
@ -286,7 +286,7 @@ public class CSVParser {
        //  important: make sure a new char gets consumed in each iteration
        while (!tkn.isReady && tkn.type != TT_EOF) {
            // ignore whitespaces at beginning of a token
-            while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
+            while (strategy.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) {
                wsBuf.append((char) c);
                c = in.read();
                eol = isEndOfLine(c);
@ -316,7 +316,7 @@ public class CSVParser {
            } else {
                // next token must be a simple token
                // add removed blanks when not ignoring whitespace chars...
-                if (!strategy.getIgnoreLeadingWhitespaces()) {
+                if (!strategy.isLeadingSpacesIgnored()) {
                    tkn.content.append(wsBuf);
                }
                simpleTokenLexer(tkn, c);
@ -359,7 +359,7 @@ public class CSVParser {
                tkn.type = TT_TOKEN;
                tkn.isReady = true;
                break;
-            } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
+            } else if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
                // interpret unicode escaped chars (like \u0070 -> p)
                tkn.content.append((char) unicodeEscapeLexer(c));
            } else if (c == strategy.getEscape()) {
@ -371,7 +371,7 @@ public class CSVParser {
            c = in.read();
        }

-        if (strategy.getIgnoreTrailingWhitespaces()) {
+        if (strategy.isTrailingSpacesIgnored()) {
            tkn.content.trimTrailingWhitespace();
        }

@ -400,7 +400,7 @@ public class CSVParser {
        for (; ;) {
            c = in.read();

-            if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
+            if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
                tkn.content.append((char) unicodeEscapeLexer(c));
            } else if (c == strategy.getEscape()) {
                tkn.content.append((char) readEscape(c));
--- a/src/main/java/org/apache/commons/csv/CSVPrinter.java
+++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java
@ -58,7 +58,7 @@ public class CSVPrinter {
     * Output a blank line
     */
    public void println() throws IOException {
-        out.write(strategy.getPrinterNewline());
+        out.write(strategy.getLineSeparator());
        newLine = true;
    }

--- a/src/main/java/org/apache/commons/csv/CSVStrategy.java
+++ b/src/main/java/org/apache/commons/csv/CSVStrategy.java
@ -26,17 +26,15 @@ import java.io.Serializable;
 */
 public class CSVStrategy implements Cloneable, Serializable {

-    private char delimiter;
-    private char encapsulator;
-    private char commentStart;
-    private char escape;
-    private boolean ignoreLeadingWhitespaces;
-    private boolean ignoreTrailingWhitespaces;
-    private boolean interpretUnicodeEscapes;
-    private boolean ignoreEmptyLines;
-
-    // controls for output
-    private String printerNewline = "\n";
+    private char delimiter = ',';
+    private char encapsulator = '"';
+    private char commentStart = COMMENTS_DISABLED;
+    private char escape = ESCAPE_DISABLED;
+    private boolean leadingSpacesIgnored = true;
+    private boolean trailingSpacesIgnored = true;
+    private boolean unicodeEscapesInterpreted = false;
+    private boolean emptyLinesIgnored = true;
+    private String lineSeparator = "\n";

    // -2 is used to signal disabled, because it won't be confused with
    // an EOF signal (-1), and because \ufffe in UTF-16 would be
@ -46,11 +44,22 @@ public class CSVStrategy implements Cloneable, Serializable {
    public static final char ESCAPE_DISABLED = (char) -2;
    public static final char ENCAPSULATOR_DISABLED = (char) -2;

+    /** Standard comma separated format. */
    public static final CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
+    
+    /** Excel file format (using a comma as the value delimiter). */
    public static final CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false);
+    
+    /** Tabulation delimited format. */
    public static final CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);


+    /**
+     * Creates a CSVStrategy with the default parameters.
+     */
+    public CSVStrategy() {
+    }
+
    public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
        this(delimiter, encapsulator, commentStart, ESCAPE_DISABLED, true, true, false, true);
    }
@ -62,103 +71,129 @@ public class CSVStrategy implements Cloneable, Serializable {
     * @param encapsulator              a char used as value encapsulation marker
     * @param commentStart              a char used for comment identification
     * @param escape                    a char used to escape special characters in values
-     * @param ignoreLeadingWhitespaces  TRUE when leading whitespaces should be ignored
-     * @param ignoreTrailingWhitespaces TRUE when trailing whitespaces should be ignored
-     * @param interpretUnicodeEscapes   TRUE when unicode escapes should be interpreted
-     * @param ignoreEmptyLines          TRUE when the parser should skip emtpy lines
+     * @param leadingSpacesIgnored      TRUE when leading whitespaces should be ignored
+     * @param trailingSpacesIgnored     TRUE when trailing whitespaces should be ignored
+     * @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted
+     * @param emptyLinesIgnored         TRUE when the parser should skip emtpy lines
     */
    public CSVStrategy(
            char delimiter,
            char encapsulator,
            char commentStart,
            char escape,
-            boolean ignoreLeadingWhitespaces,
-            boolean ignoreTrailingWhitespaces,
-            boolean interpretUnicodeEscapes,
-            boolean ignoreEmptyLines) {
+            boolean leadingSpacesIgnored,
+            boolean trailingSpacesIgnored,
+            boolean unicodeEscapesInterpreted,
+            boolean emptyLinesIgnored) {
        this.delimiter = delimiter;
        this.encapsulator = encapsulator;
        this.commentStart = commentStart;
        this.escape = escape;
-        this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
-        this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
-        this.interpretUnicodeEscapes = interpretUnicodeEscapes;
-        this.ignoreEmptyLines = ignoreEmptyLines;
-    }
-
-    public void setDelimiter(char delimiter) {
-        this.delimiter = delimiter;
+        this.leadingSpacesIgnored = leadingSpacesIgnored;
+        this.trailingSpacesIgnored = trailingSpacesIgnored;
+        this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
+        this.emptyLinesIgnored = emptyLinesIgnored;
    }

    public char getDelimiter() {
-        return this.delimiter;
+        return delimiter;
    }

-    public void setEncapsulator(char encapsulator) {
-        this.encapsulator = encapsulator;
+    public CSVStrategy withDelimiter(char delimiter) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        this.delimiter = delimiter;
+        return strategy;
    }

    public char getEncapsulator() {
-        return this.encapsulator;
+        return encapsulator;
    }

-    public void setCommentStart(char commentStart) {
-        this.commentStart = commentStart;
+    public CSVStrategy withEncapsulator(char encapsulator) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.encapsulator = encapsulator;
+        return strategy;
    }

    public char getCommentStart() {
-        return this.commentStart;
+        return commentStart;
+    }
+
+    public CSVStrategy withCommentStart(char commentStart) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.commentStart = commentStart;
+        return strategy;
    }

    public boolean isCommentingDisabled() {
        return this.commentStart == COMMENTS_DISABLED;
    }

-    public void setEscape(char escape) {
-        this.escape = escape;
-    }
-
    public char getEscape() {
-        return this.escape;
+        return escape;
    }

-    public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) {
-        this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
+    public CSVStrategy withEscape(char escape) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.escape = escape;
+        return strategy;
    }

-    public boolean getIgnoreLeadingWhitespaces() {
-        return this.ignoreLeadingWhitespaces;
+    public boolean isLeadingSpacesIgnored() {
+        return leadingSpacesIgnored;
    }

-    public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) {
-        this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
+    public CSVStrategy withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.leadingSpacesIgnored = leadingSpacesIgnored;
+        return strategy;
    }

-    public boolean getIgnoreTrailingWhitespaces() {
-        return this.ignoreTrailingWhitespaces;
+    public boolean isTrailingSpacesIgnored() {
+        return trailingSpacesIgnored;
    }

-    public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) {
-        this.interpretUnicodeEscapes = interpretUnicodeEscapes;
+    public CSVStrategy withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.trailingSpacesIgnored = trailingSpacesIgnored;
+        return strategy;
    }

-    public boolean getUnicodeEscapeInterpretation() {
-        return this.interpretUnicodeEscapes;
+    public boolean isUnicodeEscapesInterpreted() {
+        return unicodeEscapesInterpreted;
    }

-    public boolean getIgnoreEmptyLines() {
-        return this.ignoreEmptyLines;
+    public CSVStrategy withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
+        return strategy;
    }

-    public String getPrinterNewline() {
-        return this.printerNewline;
+    public boolean isEmptyLinesIgnored() {
+        return emptyLinesIgnored;
    }

-    public Object clone() {
+    public CSVStrategy withEmptyLinesIgnored(boolean emptyLinesIgnored) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.emptyLinesIgnored = emptyLinesIgnored;
+        return strategy;
+    }
+
+    public String getLineSeparator() {
+        return lineSeparator;
+    }
+
+    public CSVStrategy withLineSeparator(String lineSeparator) {
+        CSVStrategy strategy = (CSVStrategy) clone();
+        strategy.lineSeparator = lineSeparator;
+        return strategy;
+    }
+
+    protected Object clone() {
        try {
            return super.clone();
        } catch (CloneNotSupportedException e) {
-            throw new RuntimeException(e);  // impossible
+            throw (Error) new InternalError().initCause(e);
        }
    }
 }
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@ -94,10 +94,8 @@ public class CSVParserTest extends TestCase {
        *
        */
        String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
-        CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
-        // strategy.setIgnoreEmptyLines(false);
-        strategy.setCommentStart('#');
-
+        CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
+        
        TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);


@ -123,8 +121,7 @@ public class CSVParserTest extends TestCase {
        *       \,,
        */
        String code = "a,\\,,b\n\\,,";
-        CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
-        strategy.setCommentStart('#');
+        CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
        TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);

        assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
@ -520,8 +517,7 @@ public class CSVParserTest extends TestCase {

    public void testUnicodeEscape() throws IOException {
        String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
-        CSVParser parser = new CSVParser(new StringReader(code));
-        parser.getStrategy().setUnicodeEscapeInterpretation(true);
+        CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.DEFAULT_STRATEGY.withUnicodeEscapesInterpreted(true));
        String[] data = parser.getLine();
        assertEquals(2, data.length);
        assertEquals("abc", data[0]);
--- a/src/test/java/org/apache/commons/csv/CSVStrategyTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVStrategyTest.java
@ -14,76 +14,35 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
 package org.apache.commons.csv;

 import junit.framework.TestCase;

-/**
- * CSVStrategyTest
- *
- * The test are organized in three different sections:
- * The 'setter/getter' section, the lexer section and finally the strategy
- * section. In case a test fails, you should follow a top-down approach for
- * fixing a potential bug (its likely that the strategy itself fails if the lexer
- * has problems...).
- */
 public class CSVStrategyTest extends TestCase {

-    // ======================================================
-    //   getters / setters
-    // ======================================================
-    public void testGetSetCommentStart() {
-        CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
-        strategy.setCommentStart('#');
-        assertEquals(strategy.getCommentStart(), '#');
-        strategy.setCommentStart('!');
-        assertEquals(strategy.getCommentStart(), '!');
-    }
+    public void testImmutalibity() {
+        CSVStrategy strategy1 = new CSVStrategy('!', '!', '!', '!', true, true, true, true);
+        CSVStrategy strategy2 = strategy1.withDelimiter('?')
+                                         .withEncapsulator('?')
+                                         .withCommentStart('?')
+                                         .withLineSeparator("?")
+                                         .withEscape('?')
+                                         .withLeadingSpacesIgnored(false)
+                                         .withTrailingSpacesIgnored(false)
+                                         .withEmptyLinesIgnored(false)
+                                         .withUnicodeEscapesInterpreted(false);

-    public void testGetSetEncapsulator() {
-        CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
-        strategy.setEncapsulator('"');
-        assertEquals(strategy.getEncapsulator(), '"');
-        strategy.setEncapsulator('\'');
-        assertEquals(strategy.getEncapsulator(), '\'');
-    }
-
-    public void testGetSetDelimiter() {
-        CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
-        strategy.setDelimiter(';');
-        assertEquals(strategy.getDelimiter(), ';');
-        strategy.setDelimiter(',');
-        assertEquals(strategy.getDelimiter(), ',');
-        strategy.setDelimiter('\t');
-        assertEquals(strategy.getDelimiter(), '\t');
-    }
-
-    public void testSetCSVStrategy() {
-        CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
-        // default settings
-        assertEquals(strategy.getDelimiter(), ',');
-        assertEquals(strategy.getEncapsulator(), '"');
-        assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
-        assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
-        assertEquals(false, strategy.getUnicodeEscapeInterpretation());
-        assertEquals(true, strategy.getIgnoreEmptyLines());
-        // explicit csv settings
-        assertEquals(strategy.getDelimiter(), ',');
-        assertEquals(strategy.getEncapsulator(), '"');
-        assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
-        assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
-        assertEquals(false, strategy.getUnicodeEscapeInterpretation());
-        assertEquals(true, strategy.getIgnoreEmptyLines());
-    }
-
-    public void testSetExcelStrategy() {
-        CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
-        assertEquals(strategy.getDelimiter(), ',');
-        assertEquals(strategy.getEncapsulator(), '"');
-        assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
-        assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
-        assertEquals(false, strategy.getUnicodeEscapeInterpretation());
-        assertEquals(false, strategy.getIgnoreEmptyLines());
+        assertNotSame(strategy1.getDelimiter(), strategy2.getDelimiter());
+        assertNotSame(strategy1.getEncapsulator(), strategy2.getEncapsulator());
+        assertNotSame(strategy1.getCommentStart(), strategy2.getCommentStart());
+        assertNotSame(strategy1.getEscape(), strategy2.getEscape());
+        assertNotSame(strategy1.getLineSeparator(), strategy2.getLineSeparator());
+        
+        assertNotSame(strategy1.isTrailingSpacesIgnored(), strategy2.isTrailingSpacesIgnored());
+        assertNotSame(strategy1.isLeadingSpacesIgnored(), strategy2.isLeadingSpacesIgnored());
+        assertNotSame(strategy1.isEmptyLinesIgnored(), strategy2.isEmptyLinesIgnored());
+        assertNotSame(strategy1.isUnicodeEscapesInterpreted(), strategy2.isUnicodeEscapesInterpreted());
    }

 }