CSV-54 Confusing semantic of the ignore leading/trailing spaces parameters

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1305494 13f79535-47bb-0310-9956-ffa450edef68
2012-03-26 19:02:30 +00:00 · 2012-03-26 19:02:30 +00:00 · 38741a48c6
parent 5063b16c7a
commit 38741a48c6
7 changed files with 38 additions and 82 deletions
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@ -36,8 +36,7 @@ public class CSVFormat implements Serializable {
    private final char encapsulator;
    private final char commentStart;
    private final char escape;
-    private final boolean leadingSpacesIgnored;
-    private final boolean trailingSpacesIgnored;
+    private final boolean surroundingSpacesIgnored; // Should leading/trailing spaces be ignored around values?
    private final boolean emptyLinesIgnored;
    private final String lineSeparator; // for outputs
    private final String[] header;
@ -55,7 +54,7 @@ public class CSVFormat implements Serializable {
     * Starting format with no settings defined; used for creating other formats from scratch.
     */
    private static CSVFormat PRISTINE = 
-            new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, false, null, null);
+            new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);

    /** 
     * Standard comma separated format, as for {@link #RFC4180} but allowing blank lines. 
@ -113,8 +112,7 @@ public class CSVFormat implements Serializable {
            PRISTINE
            .withDelimiter('\t')
            .withEncapsulator('"')
-            .withLeadingSpacesIgnored(true)
-            .withTrailingSpacesIgnored(true)
+            .withSurroundingSpacesIgnored(true)
            .withEmptyLinesIgnored(true)
            .withLineSeparator(CRLF)
            ;
@ -142,8 +140,7 @@ public class CSVFormat implements Serializable {
     * @param encapsulator              the char used as value encapsulation marker
     * @param commentStart              the char used for comment identification
     * @param escape                    the char used to escape special characters in values
-     * @param leadingSpacesIgnored      <tt>true</tt> when leading whitespaces should be ignored
-     * @param trailingSpacesIgnored     <tt>true</tt> when trailing whitespaces should be ignored
+     * @param surroundingSpacesIgnored  <tt>true</tt> when whitespaces enclosing values should be ignored
     * @param emptyLinesIgnored         <tt>true</tt> when the parser should skip emtpy lines
     * @param lineSeparator             the line separator to use for output
     * @param header                    the header
@ -153,8 +150,7 @@ public class CSVFormat implements Serializable {
            char encapsulator,
            char commentStart,
            char escape,
-            boolean leadingSpacesIgnored,
-            boolean trailingSpacesIgnored,
+            boolean surroundingSpacesIgnored,
            boolean emptyLinesIgnored,
            String lineSeparator,
            String[] header) {
@ -162,8 +158,7 @@ public class CSVFormat implements Serializable {
        this.encapsulator = encapsulator;
        this.commentStart = commentStart;
        this.escape = escape;
-        this.leadingSpacesIgnored = leadingSpacesIgnored;
-        this.trailingSpacesIgnored = trailingSpacesIgnored;
+        this.surroundingSpacesIgnored = surroundingSpacesIgnored;
        this.emptyLinesIgnored = emptyLinesIgnored;
        this.lineSeparator = lineSeparator;
        this.header = header;
@ -226,7 +221,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The delimiter cannot be a line break");
        }

-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    /**
@ -250,7 +245,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The encapsulator cannot be a line break");
        }
        
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    boolean isEncapsulating() {
@ -278,7 +273,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The comment start character cannot be a line break");
        }
        
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    /**
@ -311,7 +306,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The escape character cannot be a line break");
        }
        
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    boolean isEscaping() {
@ -319,43 +314,12 @@ public class CSVFormat implements Serializable {
    }

    /**
-     * Tells if the spaces characters at the beginning of the values are ignored when parsing a file.
+     * Specifies whether spaces around values are ignored when parsing input.
     * 
-     * @return <tt>true</tt> if leading spaces are removed, <tt>false</tt> if they are preserved.
+     * @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value.
     */
-    public boolean isLeadingSpacesIgnored() {
-        return leadingSpacesIgnored;
-    }
-
-    /**
-     * Returns a copy of this format with the specified left trimming behavior.
-     *
-     * @param leadingSpacesIgnored the left trimming behavior, <tt>true</tt> to remove the leading spaces,
-     *                             <tt>false</tt> to leave the spaces as is.
-     * @return A copy of this format with the specified left trimming behavior.
-     */
-    public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
-    }
-
-    /**
-     * Tells if the spaces characters at the end of the values are ignored when parsing a file.
-     * 
-     * @return <tt>true</tt> if trailing spaces are removed, <tt>false</tt> if they are preserved.
-     */
-    public boolean isTrailingSpacesIgnored() {
-        return trailingSpacesIgnored;
-    }
-
-    /**
-     * Returns a copy of this format with the specified right trimming behavior.
-     *
-     * @param trailingSpacesIgnored the right trimming behavior, <tt>true</tt> to remove the trailing spaces,
-     *                              <tt>false</tt> to leave the spaces as is.
-     * @return A copy of this format with the specified right trimming behavior.
-     */
-    public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+    public boolean isSurroundingSpacesIgnored() {
+        return surroundingSpacesIgnored;
    }

    /**
@ -366,7 +330,7 @@ public class CSVFormat implements Serializable {
     * @return A copy of this format with the specified trimming behavior.
     */
    public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    /**
@ -386,7 +350,7 @@ public class CSVFormat implements Serializable {
     * @return A copy of this format  with the specified empty line skipping behavior.
     */
    public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    /**
@ -406,7 +370,7 @@ public class CSVFormat implements Serializable {
     * @return A copy of this format using the specified output line separator
     */
    public CSVFormat withLineSeparator(String lineSeparator) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    String[] getHeader() {
@ -428,7 +392,7 @@ public class CSVFormat implements Serializable {
     * @return A copy of this format using the specified header
     */
    public CSVFormat withHeader(String... header) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
    }

    /**
--- a/src/main/java/org/apache/commons/csv/CSVLexer.java
+++ b/src/main/java/org/apache/commons/csv/CSVLexer.java
@ -81,7 +81,7 @@ class CSVLexer extends Lexer {
        //  important: make sure a new char gets consumed in each iteration
        while (tkn.type == INVALID) {
            // ignore whitespaces at beginning of a token
-            if (leadingSpacesIgnored) {
+            if (surroundingSpacesIgnored) {
                while (isWhitespace(c) && !eol) {
                    c = in.read();
                    eol = isEndOfLine(c);
@ -158,7 +158,7 @@ class CSVLexer extends Lexer {
            c = in.read();
        }

-        if (trailingSpacesIgnored) {
+        if (surroundingSpacesIgnored) {
            trimTrailingSpaces(tkn.content);
        }

--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@ -34,8 +34,7 @@ abstract class Lexer {
    private final char encapsulator;
    private final char commmentStart;
    
-    final boolean leadingSpacesIgnored;
-    final boolean trailingSpacesIgnored;
+    final boolean surroundingSpacesIgnored;
    final boolean emptyLinesIgnored;
    
    final CSVFormat format;
@ -53,8 +52,7 @@ abstract class Lexer {
        this.escape = format.getEscape();
        this.encapsulator = format.getEncapsulator();
        this.commmentStart = format.getCommentStart();
-        this.leadingSpacesIgnored = format.isLeadingSpacesIgnored();
-        this.trailingSpacesIgnored = format.isTrailingSpacesIgnored();
+        this.surroundingSpacesIgnored = format.isSurroundingSpacesIgnored();
        this.emptyLinesIgnored = format.isEmptyLinesIgnored();
    }

--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@ -30,15 +30,14 @@ public class CSVFormatTest {

    @Test
    public void testImmutalibity() {
-        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
+        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
        
        format.withDelimiter('?');
        format.withEncapsulator('?');
        format.withCommentStart('?');
        format.withLineSeparator("?");
        format.withEscape('?');
-        format.withLeadingSpacesIgnored(false);
-        format.withTrailingSpacesIgnored(false);
+        format.withSurroundingSpacesIgnored(false);
        format.withEmptyLinesIgnored(false);
        
        assertEquals('!', format.getDelimiter());
@ -47,14 +46,13 @@ public class CSVFormatTest {
        assertEquals('!', format.getEscape());
        assertEquals("\r\n", format.getLineSeparator());
        
-        assertTrue(format.isLeadingSpacesIgnored());
-        assertTrue(format.isTrailingSpacesIgnored());
+        assertTrue(format.isSurroundingSpacesIgnored());
        assertTrue(format.isEmptyLinesIgnored());
    }

    @Test
    public void testMutators() {
-        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
+        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
        
        assertEquals('?', format.withDelimiter('?').getDelimiter());
        assertEquals('?', format.withEncapsulator('?').getEncapsulator());
@ -62,10 +60,7 @@ public class CSVFormatTest {
        assertEquals("?", format.withLineSeparator("?").getLineSeparator());
        assertEquals('?', format.withEscape('?').getEscape());
        
-        assertFalse(format.withLeadingSpacesIgnored(false).isLeadingSpacesIgnored());
-        assertFalse(format.withTrailingSpacesIgnored(false).isTrailingSpacesIgnored());
-        assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
-        assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
+        assertFalse(format.withSurroundingSpacesIgnored(false).isSurroundingSpacesIgnored());
        assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
    }

@ -170,8 +165,7 @@ public class CSVFormatTest {
        assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart());
        assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator());
        assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape());
-        assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored());
-        assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
+        assertEquals("trim", CSVFormat.DEFAULT.isSurroundingSpacesIgnored(), format.isSurroundingSpacesIgnored());
        assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored());
    }
 } 
--- a/src/test/java/org/apache/commons/csv/CSVLexer1.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexer1.java
@ -82,7 +82,7 @@ class CSVLexer1 extends Lexer {
        //  important: make sure a new char gets consumed in each iteration
        while (!tkn.isReady && tkn.type != EOF) {
            // ignore whitespaces at beginning of a token
-            if (format.isLeadingSpacesIgnored()) {
+            if (format.isSurroundingSpacesIgnored()) {
                while (isWhitespace(c) && !eol) {
                    wsBuf.append((char) c);
                    c = in.read();
@ -115,7 +115,7 @@ class CSVLexer1 extends Lexer {
            } else {
                // next token must be a simple token
                // add removed blanks when not ignoring whitespace chars...
-                if (!format.isLeadingSpacesIgnored()) {
+                if (!format.isSurroundingSpacesIgnored()) {
                    tkn.content.append(wsBuf);
                }
                simpleTokenLexer(tkn, c);
@ -167,7 +167,7 @@ class CSVLexer1 extends Lexer {
            c = in.read();
        }

-        if (format.isTrailingSpacesIgnored()) {
+        if (format.isSurroundingSpacesIgnored()) {
            trimTrailingSpaces(tkn.content);
        }

--- a/src/test/java/org/apache/commons/csv/CSVLexerTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexerTest.java
@ -40,7 +40,7 @@ public class CSVLexerTest {
    @Test
    public void testNextToken1() throws IOException {
        String code = "abc,def, hijk,  lmnop,   qrst,uv ,wxy   ,z , ,";
-        CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
+        CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
        assertTokenEquals(TOKEN, "abc", parser.nextToken(new Token()));
        assertTokenEquals(TOKEN, "def", parser.nextToken(new Token()));
        assertTokenEquals(TOKEN, "hijk", parser.nextToken(new Token()));
@ -115,7 +115,7 @@ public class CSVLexerTest {
        *        a,  " foo " ,b
        */
        String code = "a,\"foo\",b\na,   \" foo\",b\na,\"foo \"  ,b\na,  \" foo \"  ,b";
-        CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
+        CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
        assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
        assertTokenEquals(TOKEN, "foo", parser.nextToken(new Token()));
        assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@ -56,9 +56,9 @@ public class CSVParserTest {

    @Test
    public void testGetLine() throws IOException {
-        CSVParser parser = new CSVParser(new StringReader(code));
+        CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
        for (String[] re : res) {
-            assertTrue(Arrays.equals(re, parser.getRecord().values()));
+            assertTrue("Failed to match: "+Arrays.toString(re), Arrays.equals(re, parser.getRecord().values()));
        }
        
        assertNull(parser.getRecord());
@ -66,7 +66,7 @@ public class CSVParserTest {

    @Test
    public void testGetRecords() throws IOException {
-        CSVParser parser = new CSVParser(new StringReader(code));
+        CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
        List<CSVRecord> records = parser.getRecords();
        assertEquals(res.length, records.size());
        assertTrue(records.size() > 0);
@ -283,7 +283,7 @@ public class CSVParserTest {
        };


-        CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
+        CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, true, "\r\n", null);

        CSVParser parser = new CSVParser(code, format);
        List<CSVRecord> records = parser.getRecords();
@ -312,7 +312,7 @@ public class CSVParserTest {
        };


-        CSVFormat format = new CSVFormat(',',  CSVFormat.DISABLED,  CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
+        CSVFormat format = new CSVFormat(',',  CSVFormat.DISABLED,  CSVFormat.DISABLED, '/', false, true, "\r\n", null);

        CSVParser parser = new CSVParser(code, format);
        List<CSVRecord> records = parser.getRecords();