CSV-54 Confusing semantic of the ignore leading/trailing spaces parameters
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1305494 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5063b16c7a
commit
38741a48c6
|
@ -36,8 +36,7 @@ public class CSVFormat implements Serializable {
|
|||
private final char encapsulator;
|
||||
private final char commentStart;
|
||||
private final char escape;
|
||||
private final boolean leadingSpacesIgnored;
|
||||
private final boolean trailingSpacesIgnored;
|
||||
private final boolean surroundingSpacesIgnored; // Should leading/trailing spaces be ignored around values?
|
||||
private final boolean emptyLinesIgnored;
|
||||
private final String lineSeparator; // for outputs
|
||||
private final String[] header;
|
||||
|
@ -55,7 +54,7 @@ public class CSVFormat implements Serializable {
|
|||
* Starting format with no settings defined; used for creating other formats from scratch.
|
||||
*/
|
||||
private static CSVFormat PRISTINE =
|
||||
new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, false, null, null);
|
||||
new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);
|
||||
|
||||
/**
|
||||
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
|
||||
|
@ -113,8 +112,7 @@ public class CSVFormat implements Serializable {
|
|||
PRISTINE
|
||||
.withDelimiter('\t')
|
||||
.withEncapsulator('"')
|
||||
.withLeadingSpacesIgnored(true)
|
||||
.withTrailingSpacesIgnored(true)
|
||||
.withSurroundingSpacesIgnored(true)
|
||||
.withEmptyLinesIgnored(true)
|
||||
.withLineSeparator(CRLF)
|
||||
;
|
||||
|
@ -142,8 +140,7 @@ public class CSVFormat implements Serializable {
|
|||
* @param encapsulator the char used as value encapsulation marker
|
||||
* @param commentStart the char used for comment identification
|
||||
* @param escape the char used to escape special characters in values
|
||||
* @param leadingSpacesIgnored <tt>true</tt> when leading whitespaces should be ignored
|
||||
* @param trailingSpacesIgnored <tt>true</tt> when trailing whitespaces should be ignored
|
||||
* @param surroundingSpacesIgnored <tt>true</tt> when whitespaces enclosing values should be ignored
|
||||
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
|
||||
* @param lineSeparator the line separator to use for output
|
||||
* @param header the header
|
||||
|
@ -153,8 +150,7 @@ public class CSVFormat implements Serializable {
|
|||
char encapsulator,
|
||||
char commentStart,
|
||||
char escape,
|
||||
boolean leadingSpacesIgnored,
|
||||
boolean trailingSpacesIgnored,
|
||||
boolean surroundingSpacesIgnored,
|
||||
boolean emptyLinesIgnored,
|
||||
String lineSeparator,
|
||||
String[] header) {
|
||||
|
@ -162,8 +158,7 @@ public class CSVFormat implements Serializable {
|
|||
this.encapsulator = encapsulator;
|
||||
this.commentStart = commentStart;
|
||||
this.escape = escape;
|
||||
this.leadingSpacesIgnored = leadingSpacesIgnored;
|
||||
this.trailingSpacesIgnored = trailingSpacesIgnored;
|
||||
this.surroundingSpacesIgnored = surroundingSpacesIgnored;
|
||||
this.emptyLinesIgnored = emptyLinesIgnored;
|
||||
this.lineSeparator = lineSeparator;
|
||||
this.header = header;
|
||||
|
@ -226,7 +221,7 @@ public class CSVFormat implements Serializable {
|
|||
throw new IllegalArgumentException("The delimiter cannot be a line break");
|
||||
}
|
||||
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -250,7 +245,7 @@ public class CSVFormat implements Serializable {
|
|||
throw new IllegalArgumentException("The encapsulator cannot be a line break");
|
||||
}
|
||||
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
boolean isEncapsulating() {
|
||||
|
@ -278,7 +273,7 @@ public class CSVFormat implements Serializable {
|
|||
throw new IllegalArgumentException("The comment start character cannot be a line break");
|
||||
}
|
||||
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -311,7 +306,7 @@ public class CSVFormat implements Serializable {
|
|||
throw new IllegalArgumentException("The escape character cannot be a line break");
|
||||
}
|
||||
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
boolean isEscaping() {
|
||||
|
@ -319,43 +314,12 @@ public class CSVFormat implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tells if the spaces characters at the beginning of the values are ignored when parsing a file.
|
||||
* Specifies whether spaces around values are ignored when parsing input.
|
||||
*
|
||||
* @return <tt>true</tt> if leading spaces are removed, <tt>false</tt> if they are preserved.
|
||||
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value.
|
||||
*/
|
||||
public boolean isLeadingSpacesIgnored() {
|
||||
return leadingSpacesIgnored;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of this format with the specified left trimming behavior.
|
||||
*
|
||||
* @param leadingSpacesIgnored the left trimming behavior, <tt>true</tt> to remove the leading spaces,
|
||||
* <tt>false</tt> to leave the spaces as is.
|
||||
* @return A copy of this format with the specified left trimming behavior.
|
||||
*/
|
||||
public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells if the spaces characters at the end of the values are ignored when parsing a file.
|
||||
*
|
||||
* @return <tt>true</tt> if trailing spaces are removed, <tt>false</tt> if they are preserved.
|
||||
*/
|
||||
public boolean isTrailingSpacesIgnored() {
|
||||
return trailingSpacesIgnored;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of this format with the specified right trimming behavior.
|
||||
*
|
||||
* @param trailingSpacesIgnored the right trimming behavior, <tt>true</tt> to remove the trailing spaces,
|
||||
* <tt>false</tt> to leave the spaces as is.
|
||||
* @return A copy of this format with the specified right trimming behavior.
|
||||
*/
|
||||
public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
public boolean isSurroundingSpacesIgnored() {
|
||||
return surroundingSpacesIgnored;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -366,7 +330,7 @@ public class CSVFormat implements Serializable {
|
|||
* @return A copy of this format with the specified trimming behavior.
|
||||
*/
|
||||
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -386,7 +350,7 @@ public class CSVFormat implements Serializable {
|
|||
* @return A copy of this format with the specified empty line skipping behavior.
|
||||
*/
|
||||
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -406,7 +370,7 @@ public class CSVFormat implements Serializable {
|
|||
* @return A copy of this format using the specified output line separator
|
||||
*/
|
||||
public CSVFormat withLineSeparator(String lineSeparator) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
String[] getHeader() {
|
||||
|
@ -428,7 +392,7 @@ public class CSVFormat implements Serializable {
|
|||
* @return A copy of this format using the specified header
|
||||
*/
|
||||
public CSVFormat withHeader(String... header) {
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored, trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -81,7 +81,7 @@ class CSVLexer extends Lexer {
|
|||
// important: make sure a new char gets consumed in each iteration
|
||||
while (tkn.type == INVALID) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
if (leadingSpacesIgnored) {
|
||||
if (surroundingSpacesIgnored) {
|
||||
while (isWhitespace(c) && !eol) {
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
|
@ -158,7 +158,7 @@ class CSVLexer extends Lexer {
|
|||
c = in.read();
|
||||
}
|
||||
|
||||
if (trailingSpacesIgnored) {
|
||||
if (surroundingSpacesIgnored) {
|
||||
trimTrailingSpaces(tkn.content);
|
||||
}
|
||||
|
||||
|
|
|
@ -34,8 +34,7 @@ abstract class Lexer {
|
|||
private final char encapsulator;
|
||||
private final char commmentStart;
|
||||
|
||||
final boolean leadingSpacesIgnored;
|
||||
final boolean trailingSpacesIgnored;
|
||||
final boolean surroundingSpacesIgnored;
|
||||
final boolean emptyLinesIgnored;
|
||||
|
||||
final CSVFormat format;
|
||||
|
@ -53,8 +52,7 @@ abstract class Lexer {
|
|||
this.escape = format.getEscape();
|
||||
this.encapsulator = format.getEncapsulator();
|
||||
this.commmentStart = format.getCommentStart();
|
||||
this.leadingSpacesIgnored = format.isLeadingSpacesIgnored();
|
||||
this.trailingSpacesIgnored = format.isTrailingSpacesIgnored();
|
||||
this.surroundingSpacesIgnored = format.isSurroundingSpacesIgnored();
|
||||
this.emptyLinesIgnored = format.isEmptyLinesIgnored();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,15 +30,14 @@ public class CSVFormatTest {
|
|||
|
||||
@Test
|
||||
public void testImmutalibity() {
|
||||
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
|
||||
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
|
||||
|
||||
format.withDelimiter('?');
|
||||
format.withEncapsulator('?');
|
||||
format.withCommentStart('?');
|
||||
format.withLineSeparator("?");
|
||||
format.withEscape('?');
|
||||
format.withLeadingSpacesIgnored(false);
|
||||
format.withTrailingSpacesIgnored(false);
|
||||
format.withSurroundingSpacesIgnored(false);
|
||||
format.withEmptyLinesIgnored(false);
|
||||
|
||||
assertEquals('!', format.getDelimiter());
|
||||
|
@ -47,14 +46,13 @@ public class CSVFormatTest {
|
|||
assertEquals('!', format.getEscape());
|
||||
assertEquals("\r\n", format.getLineSeparator());
|
||||
|
||||
assertTrue(format.isLeadingSpacesIgnored());
|
||||
assertTrue(format.isTrailingSpacesIgnored());
|
||||
assertTrue(format.isSurroundingSpacesIgnored());
|
||||
assertTrue(format.isEmptyLinesIgnored());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMutators() {
|
||||
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
|
||||
CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, "\r\n", null);
|
||||
|
||||
assertEquals('?', format.withDelimiter('?').getDelimiter());
|
||||
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
|
||||
|
@ -62,10 +60,7 @@ public class CSVFormatTest {
|
|||
assertEquals("?", format.withLineSeparator("?").getLineSeparator());
|
||||
assertEquals('?', format.withEscape('?').getEscape());
|
||||
|
||||
assertFalse(format.withLeadingSpacesIgnored(false).isLeadingSpacesIgnored());
|
||||
assertFalse(format.withTrailingSpacesIgnored(false).isTrailingSpacesIgnored());
|
||||
assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
|
||||
assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
|
||||
assertFalse(format.withSurroundingSpacesIgnored(false).isSurroundingSpacesIgnored());
|
||||
assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
|
||||
}
|
||||
|
||||
|
@ -170,8 +165,7 @@ public class CSVFormatTest {
|
|||
assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart());
|
||||
assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator());
|
||||
assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape());
|
||||
assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored());
|
||||
assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
|
||||
assertEquals("trim", CSVFormat.DEFAULT.isSurroundingSpacesIgnored(), format.isSurroundingSpacesIgnored());
|
||||
assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ class CSVLexer1 extends Lexer {
|
|||
// important: make sure a new char gets consumed in each iteration
|
||||
while (!tkn.isReady && tkn.type != EOF) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
if (format.isLeadingSpacesIgnored()) {
|
||||
if (format.isSurroundingSpacesIgnored()) {
|
||||
while (isWhitespace(c) && !eol) {
|
||||
wsBuf.append((char) c);
|
||||
c = in.read();
|
||||
|
@ -115,7 +115,7 @@ class CSVLexer1 extends Lexer {
|
|||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
if (!format.isLeadingSpacesIgnored()) {
|
||||
if (!format.isSurroundingSpacesIgnored()) {
|
||||
tkn.content.append(wsBuf);
|
||||
}
|
||||
simpleTokenLexer(tkn, c);
|
||||
|
@ -167,7 +167,7 @@ class CSVLexer1 extends Lexer {
|
|||
c = in.read();
|
||||
}
|
||||
|
||||
if (format.isTrailingSpacesIgnored()) {
|
||||
if (format.isSurroundingSpacesIgnored()) {
|
||||
trimTrailingSpaces(tkn.content);
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ public class CSVLexerTest {
|
|||
@Test
|
||||
public void testNextToken1() throws IOException {
|
||||
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
|
||||
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
|
||||
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
|
||||
assertTokenEquals(TOKEN, "abc", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "def", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "hijk", parser.nextToken(new Token()));
|
||||
|
@ -115,7 +115,7 @@ public class CSVLexerTest {
|
|||
* a, " foo " ,b
|
||||
*/
|
||||
String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
|
||||
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
|
||||
CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
|
||||
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "foo", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
|
||||
|
|
|
@ -56,9 +56,9 @@ public class CSVParserTest {
|
|||
|
||||
@Test
|
||||
public void testGetLine() throws IOException {
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
|
||||
for (String[] re : res) {
|
||||
assertTrue(Arrays.equals(re, parser.getRecord().values()));
|
||||
assertTrue("Failed to match: "+Arrays.toString(re), Arrays.equals(re, parser.getRecord().values()));
|
||||
}
|
||||
|
||||
assertNull(parser.getRecord());
|
||||
|
@ -66,7 +66,7 @@ public class CSVParserTest {
|
|||
|
||||
@Test
|
||||
public void testGetRecords() throws IOException {
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.DEFAULT.withSurroundingSpacesIgnored(true));
|
||||
List<CSVRecord> records = parser.getRecords();
|
||||
assertEquals(res.length, records.size());
|
||||
assertTrue(records.size() > 0);
|
||||
|
@ -283,7 +283,7 @@ public class CSVParserTest {
|
|||
};
|
||||
|
||||
|
||||
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
|
||||
CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, true, "\r\n", null);
|
||||
|
||||
CSVParser parser = new CSVParser(code, format);
|
||||
List<CSVRecord> records = parser.getRecords();
|
||||
|
@ -312,7 +312,7 @@ public class CSVParserTest {
|
|||
};
|
||||
|
||||
|
||||
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, false, true, "\r\n", null);
|
||||
CSVFormat format = new CSVFormat(',', CSVFormat.DISABLED, CSVFormat.DISABLED, '/', false, true, "\r\n", null);
|
||||
|
||||
CSVParser parser = new CSVParser(code, format);
|
||||
List<CSVRecord> records = parser.getRecords();
|
||||
|
|
Loading…
Reference in New Issue