Extracted the strategy concept into its own class
git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/sandbox/csv/trunk@399987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f047581f95
commit
eac54a225b
|
@ -65,16 +65,10 @@ public class CSVParser {
|
|||
/** Token with content when end of a line is reached. */
|
||||
protected static final int TT_EORECORD = 2;
|
||||
|
||||
// the csv definition
|
||||
private char delimiter;
|
||||
private char encapsulator;
|
||||
private char commentStart;
|
||||
private boolean ignoreLeadingWhitespaces;
|
||||
private boolean interpretUnicodeEscapes;
|
||||
private boolean ignoreEmptyLines;
|
||||
|
||||
// the input stream
|
||||
private ExtendedBufferedReader in;
|
||||
|
||||
private CSVStrategy strategy;
|
||||
|
||||
/**
|
||||
* Token is an internal token representation.
|
||||
|
@ -106,7 +100,7 @@ public class CSVParser {
|
|||
* @param s CSV String to be parsed.
|
||||
* @return parsed String matrix (which is never null)
|
||||
* @throws IOException in case of error
|
||||
* @see #setCSVStrategy()
|
||||
* @see #setStrategy()
|
||||
*/
|
||||
public static String[][] parse(String s) throws IOException {
|
||||
if (s == null) {
|
||||
|
@ -130,7 +124,7 @@ public class CSVParser {
|
|||
* @param s CSV String to be parsed.
|
||||
* @return parsed String vector (which is never null)
|
||||
* @throws IOException in case of error
|
||||
* @see #setCSVStrategy()
|
||||
* @see #setStrategy()
|
||||
*/
|
||||
public static String[] parseLine(String s) throws IOException {
|
||||
if (s == null) {
|
||||
|
@ -151,7 +145,7 @@ public class CSVParser {
|
|||
* Default strategy for the parser follows the default CSV Strategy.
|
||||
*
|
||||
* @param input an InputStream containing "csv-formatted" stream
|
||||
* @see #setCSVStrategy()
|
||||
* @see #setStrategy()
|
||||
*/
|
||||
public CSVParser(InputStream input) {
|
||||
this(new InputStreamReader(input));
|
||||
|
@ -161,7 +155,7 @@ public class CSVParser {
|
|||
* Default strategy for the parser follows the default CSV Strategy.
|
||||
*
|
||||
* @param input a Reader based on "csv-formatted" input
|
||||
* @see #setCSVStrategy()
|
||||
* @see #setStrategy()
|
||||
*/
|
||||
public CSVParser(Reader input) {
|
||||
// note: must match default-CSV-strategy !!
|
||||
|
@ -172,7 +166,7 @@ public class CSVParser {
|
|||
* Customized value delimiter parser.
|
||||
*
|
||||
* The parser follows the default CSV strategy as defined in
|
||||
* {@link #setCSVStrategy()} except for the delimiter setting.
|
||||
* {@link #setStrategy()} except for the delimiter setting.
|
||||
*
|
||||
* @param input a Reader based on "csv-formatted" input
|
||||
* @param delimiter a Char used for value separation
|
||||
|
@ -193,18 +187,9 @@ public class CSVParser {
|
|||
* @param encapsulator a Char used as value encapsulation marker
|
||||
* @param commentStart a Char used for comment identification
|
||||
*/
|
||||
public CSVParser(
|
||||
Reader input,
|
||||
char delimiter,
|
||||
char encapsulator,
|
||||
char commentStart) {
|
||||
public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
|
||||
this.in = new ExtendedBufferedReader(input);
|
||||
this.setDelimiter(delimiter);
|
||||
this.setEncapsulator(encapsulator);
|
||||
this.setCommentStart(commentStart);
|
||||
this.setIgnoreLeadingWhitespaces(true);
|
||||
this.setUnicodeEscapeInterpretation(false);
|
||||
this.setIgnoreEmptyLines(true);
|
||||
this.strategy = new CSVStrategy(delimiter, encapsulator, commentStart);
|
||||
}
|
||||
|
||||
// ======================================================
|
||||
|
@ -350,7 +335,7 @@ public class CSVParser {
|
|||
c = in.readAgain();
|
||||
|
||||
// empty line detection: eol AND (last char was EOL or beginning)
|
||||
while (ignoreEmptyLines && eol
|
||||
while (strategy.getIgnoreEmptyLines() && eol
|
||||
&& (lastChar == '\n'
|
||||
|| lastChar == ExtendedBufferedReader.UNDEFINED)
|
||||
&& !isEndOfFile(lastChar)) {
|
||||
|
@ -367,7 +352,7 @@ public class CSVParser {
|
|||
}
|
||||
|
||||
// did we reached eof during the last iteration already ? TT_EOF
|
||||
if (isEndOfFile(lastChar) || (lastChar != delimiter && isEndOfFile(c))) {
|
||||
if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
|
||||
tkn.type = TT_EOF;
|
||||
return tkn;
|
||||
}
|
||||
|
@ -381,11 +366,11 @@ public class CSVParser {
|
|||
eol = isEndOfLine(c);
|
||||
}
|
||||
// ok, start of token reached: comment, encapsulated, or token
|
||||
if (c == commentStart) {
|
||||
if (c == strategy.getCommentStart()) {
|
||||
// ignore everything till end of line and continue (incr linecount)
|
||||
in.readLine();
|
||||
tkn = nextToken();
|
||||
} else if (c == delimiter) {
|
||||
} else if (c == strategy.getDelimiter()) {
|
||||
// empty token return TT_TOKEN("")
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.isReady = true;
|
||||
|
@ -394,7 +379,7 @@ public class CSVParser {
|
|||
tkn.content.append("");
|
||||
tkn.type = TT_EORECORD;
|
||||
tkn.isReady = true;
|
||||
} else if (c == encapsulator) {
|
||||
} else if (c == strategy.getEncapsulator()) {
|
||||
// consume encapsulated token
|
||||
encapsulatedTokenLexer(tkn, c);
|
||||
} else if (isEndOfFile(c)) {
|
||||
|
@ -405,7 +390,7 @@ public class CSVParser {
|
|||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
if (!this.ignoreLeadingWhitespaces) {
|
||||
if (!strategy.getIgnoreLeadingWhitespaces()) {
|
||||
tkn.content.append(wsBuf.toString());
|
||||
}
|
||||
simpleTokenLexer(tkn, c);
|
||||
|
@ -443,11 +428,11 @@ public class CSVParser {
|
|||
// end of file
|
||||
tkn.type = TT_EOF;
|
||||
tkn.isReady = true;
|
||||
} else if (c == delimiter) {
|
||||
} else if (c == strategy.getDelimiter()) {
|
||||
// end of token
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.isReady = true;
|
||||
} else if (c == '\\' && interpretUnicodeEscapes && in.lookAhead() == 'u') {
|
||||
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
|
||||
// interpret unicode escaped chars (like \u0070 -> p)
|
||||
tkn.content.append((char) unicodeEscapeLexer(c));
|
||||
} else if (isWhitespace(c)) {
|
||||
|
@ -493,9 +478,9 @@ public class CSVParser {
|
|||
// assert c == delimiter;
|
||||
c = in.read();
|
||||
while (!tkn.isReady) {
|
||||
if (c == encapsulator || c == '\\') {
|
||||
if (c == strategy.getEncapsulator() || c == '\\') {
|
||||
// check lookahead
|
||||
if (in.lookAhead() == encapsulator) {
|
||||
if (in.lookAhead() == strategy.getEncapsulator()) {
|
||||
// double or escaped encapsulator -> add single encapsulator to token
|
||||
c = in.read();
|
||||
tkn.content.append((char) c);
|
||||
|
@ -506,7 +491,7 @@ public class CSVParser {
|
|||
c = in.read();
|
||||
tkn.content.append((char) c);
|
||||
} else if (
|
||||
interpretUnicodeEscapes
|
||||
strategy.getUnicodeEscapeInterpretation()
|
||||
&& c == '\\'
|
||||
&& in.lookAhead() == 'u') {
|
||||
// interpret unicode escaped chars (like \u0070 -> p)
|
||||
|
@ -518,7 +503,7 @@ public class CSVParser {
|
|||
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
|
||||
while (!tkn.isReady) {
|
||||
int n = in.lookAhead();
|
||||
if (n == delimiter) {
|
||||
if (n == strategy.getDelimiter()) {
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.isReady = true;
|
||||
} else if (isEndOfFile(n)) {
|
||||
|
@ -589,201 +574,26 @@ public class CSVParser {
|
|||
}
|
||||
|
||||
// ======================================================
|
||||
// strategy utilities
|
||||
// strategies
|
||||
// ======================================================
|
||||
|
||||
/**
|
||||
* Sets the "Default CSV" settings.
|
||||
*
|
||||
* The default csv settings are relatively restrictive but implement
|
||||
* something like the "least-common-basis" of CSV:
|
||||
* <ul>
|
||||
* <li> Delimiter of values is comma ',' (as the C in "CSV") </li>
|
||||
* <li> Complex values encapsulated by '"' </li>
|
||||
* <li> Comments are not supported </li>
|
||||
* <li> Leading whitespaces are ignored </li>
|
||||
* <li> Unicode escapes are not interpreted </li>
|
||||
* <li> empty lines are skiped </li>
|
||||
* </ul>
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setCSVStrategy() {
|
||||
setStrategy(',', '"', (char) 0, true, false, true);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the "Excel CSV" settings. There are companies out there which
|
||||
* interpret "C" as an abbreviation for "Semicolon". For these companies the
|
||||
* following settings might be appropriate:
|
||||
* <ul>
|
||||
* <li> Delimiter of values is semicolon ';' </li>
|
||||
* <li> Complex values encapsulated by '"' </li>
|
||||
* <li> Comments are not supported </li>
|
||||
* <li> Leading whitespaces are not ignored </li>
|
||||
* <li> Unicode escapes are not interpreted </li>
|
||||
* <li> empty lines are not skiped </li>
|
||||
* </ul>
|
||||
* Sets the specified CSV Strategy
|
||||
*
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setExcelStrategy() {
|
||||
setStrategy(';', '"', (char) 0, false, false, false);
|
||||
public CSVParser setStrategy(CSVStrategy strategy) {
|
||||
this.strategy = strategy;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Customized CSV strategy setter.
|
||||
* Obtain the specified CSV Strategy
|
||||
*
|
||||
* @param delimiter a Char used for value separation
|
||||
* @param encapsulator a Char used as value encapsulation marker
|
||||
* @param commentStart a Char used for comment identification
|
||||
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
|
||||
* ignored
|
||||
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
|
||||
* interpreted
|
||||
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
* @return strategy currently being used
|
||||
*/
|
||||
public CSVParser setStrategy(
|
||||
char delimiter,
|
||||
char encapsulator,
|
||||
char commentStart,
|
||||
boolean ignoreLeadingWhitespace,
|
||||
boolean interpretUnicodeEscapes,
|
||||
boolean ignoreEmptyLines) {
|
||||
this.setDelimiter(delimiter);
|
||||
this.setEncapsulator(encapsulator);
|
||||
this.setCommentStart(commentStart);
|
||||
this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
|
||||
this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
|
||||
this.setIgnoreEmptyLines(ignoreEmptyLines);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the desired delimiter.
|
||||
*
|
||||
* @param c a Char used for value separation
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setDelimiter(char c) {
|
||||
this.delimiter = c;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the delimiter.
|
||||
*
|
||||
* @return the delimiter character
|
||||
*/
|
||||
public char getDelimiter() {
|
||||
return this.delimiter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the desired encapsulator.
|
||||
*
|
||||
* @param c a Char used as value encapsulation marker
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setEncapsulator(char c) {
|
||||
this.encapsulator = c;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the encapsulator character.
|
||||
*
|
||||
* @return the encapsulator marker
|
||||
*/
|
||||
public char getEncapsulator() {
|
||||
return this.encapsulator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the desired comment start character.
|
||||
*
|
||||
* @param c a Char used for comment identification
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setCommentStart(char c) {
|
||||
this.commentStart = c;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the comment identifier.
|
||||
*
|
||||
* @return the comment identifier character
|
||||
*/
|
||||
public char getCommentStart() {
|
||||
return this.commentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables unicode escape interpretation.
|
||||
*
|
||||
* @param b TRUE when interpretation should be enabled
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setUnicodeEscapeInterpretation(boolean b) {
|
||||
this.interpretUnicodeEscapes = b;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows wether unicode interpretation is enabled.
|
||||
*
|
||||
* @return TRUE when unicode interpretation is enabled
|
||||
*/
|
||||
public boolean getUnicodeEscapeInterpretation() {
|
||||
return this.interpretUnicodeEscapes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the ignore-leading-whitespaces behaviour.
|
||||
*
|
||||
* Should the lexer ignore leading whitespaces when parsing non
|
||||
* encapsulated tokens.
|
||||
*
|
||||
* @param b TRUE when leading whitespaces should be ignored
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setIgnoreLeadingWhitespaces(boolean b) {
|
||||
this.ignoreLeadingWhitespaces = b;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows whether unicode interpretation is enabled.
|
||||
*
|
||||
* @return TRUE when unicode interpretation is enabled
|
||||
*/
|
||||
public boolean getIgnoreLeadingWhitespaces() {
|
||||
return this.ignoreLeadingWhitespaces;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the ignore-empty-line behaviour.
|
||||
*
|
||||
* When set to 'true' empty lines in the input will be ignored.
|
||||
*
|
||||
* @param b TRUE when empty lines in the input should be ignored
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVParser setIgnoreEmptyLines(boolean b) {
|
||||
this.ignoreEmptyLines = b;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows whether empty lines in the input are ignored.
|
||||
*
|
||||
* @return TRUE when empty lines in the input are ignored
|
||||
*/
|
||||
public boolean getIgnoreEmptyLines() {
|
||||
return this.ignoreEmptyLines;
|
||||
public CSVStrategy getStrategy() {
|
||||
return this.strategy;
|
||||
}
|
||||
|
||||
// ======================================================
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright 2005 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv;
|
||||
|
||||
/**
|
||||
* CSVStrategy
|
||||
*
|
||||
* Represents the strategy for a CSV.
|
||||
*/
|
||||
public class CSVStrategy {
|
||||
|
||||
private char delimiter;
|
||||
private char encapsulator;
|
||||
private char commentStart;
|
||||
private boolean ignoreLeadingWhitespaces;
|
||||
private boolean interpretUnicodeEscapes;
|
||||
private boolean ignoreEmptyLines;
|
||||
|
||||
public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', (char) 0, true, false, true);
|
||||
public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(';', '"', (char) 0, false, false, false);
|
||||
|
||||
|
||||
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
|
||||
this(delimiter, encapsulator, commentStart, true, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Customized CSV strategy setter.
|
||||
*
|
||||
* @param delimiter a Char used for value separation
|
||||
* @param encapsulator a Char used as value encapsulation marker
|
||||
* @param commentStart a Char used for comment identification
|
||||
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
|
||||
* ignored
|
||||
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
|
||||
* interpreted
|
||||
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
|
||||
* @return current instance of CSVParser to allow chained method calls
|
||||
*/
|
||||
public CSVStrategy(
|
||||
char delimiter,
|
||||
char encapsulator,
|
||||
char commentStart,
|
||||
boolean ignoreLeadingWhitespace,
|
||||
boolean interpretUnicodeEscapes,
|
||||
boolean ignoreEmptyLines)
|
||||
{
|
||||
setDelimiter(delimiter);
|
||||
setEncapsulator(encapsulator);
|
||||
setCommentStart(commentStart);
|
||||
setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
|
||||
setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
|
||||
setIgnoreEmptyLines(ignoreEmptyLines);
|
||||
}
|
||||
|
||||
public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
|
||||
public char getDelimiter() { return this.delimiter; }
|
||||
|
||||
public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; }
|
||||
public char getEncapsulator() { return this.encapsulator; }
|
||||
|
||||
public void setCommentStart(char commentStart) { this.commentStart = commentStart; }
|
||||
public char getCommentStart() { return this.commentStart; }
|
||||
|
||||
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; }
|
||||
public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; }
|
||||
|
||||
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { this.interpretUnicodeEscapes = interpretUnicodeEscapes; }
|
||||
public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; }
|
||||
|
||||
public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
|
||||
public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
|
||||
|
||||
}
|
|
@ -77,67 +77,6 @@ public class CSVParserTest extends TestCase {
|
|||
}
|
||||
|
||||
|
||||
// ======================================================
|
||||
// getters / setters
|
||||
// ======================================================
|
||||
public void testGetSetCommentStart() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
parser.setCommentStart('#');
|
||||
assertEquals(parser.getCommentStart(), '#');
|
||||
parser.setCommentStart('!');
|
||||
assertEquals(parser.getCommentStart(), '!');
|
||||
}
|
||||
|
||||
public void testGetSetEncapsulator() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
parser.setEncapsulator('"');
|
||||
assertEquals(parser.getEncapsulator(), '"');
|
||||
parser.setEncapsulator('\'');
|
||||
assertEquals(parser.getEncapsulator(), '\'');
|
||||
}
|
||||
|
||||
public void testGetSetDelimiter() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
parser.setDelimiter(';');
|
||||
assertEquals(parser.getDelimiter(), ';');
|
||||
parser.setDelimiter(',');
|
||||
assertEquals(parser.getDelimiter(), ',');
|
||||
parser.setDelimiter('\t');
|
||||
assertEquals(parser.getDelimiter(), '\t');
|
||||
}
|
||||
|
||||
public void testSetCSVStrategy() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
// default settings
|
||||
assertEquals(parser.getDelimiter(), ',');
|
||||
assertEquals(parser.getEncapsulator(), '"');
|
||||
assertEquals(parser.getCommentStart(), '\0');
|
||||
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, parser.getIgnoreEmptyLines());
|
||||
// explicit csv settings
|
||||
parser.setCSVStrategy();
|
||||
assertEquals(parser.getDelimiter(), ',');
|
||||
assertEquals(parser.getEncapsulator(), '"');
|
||||
assertEquals(parser.getCommentStart(), '\0');
|
||||
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, parser.getIgnoreEmptyLines());
|
||||
}
|
||||
|
||||
public void testSetExcelStrategy() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
// explicit Excel settings
|
||||
parser.setExcelStrategy();
|
||||
assertEquals(parser.getDelimiter(), ';');
|
||||
assertEquals(parser.getEncapsulator(), '"');
|
||||
assertEquals(parser.getCommentStart(), '\0');
|
||||
assertEquals(false, parser.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||
assertEquals(false, parser.getIgnoreEmptyLines());
|
||||
}
|
||||
|
||||
|
||||
// ======================================================
|
||||
// lexer tests
|
||||
// ======================================================
|
||||
|
@ -146,7 +85,7 @@ public class CSVParserTest extends TestCase {
|
|||
public void testNextToken1() throws IOException {
|
||||
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
|
||||
|
@ -171,9 +110,9 @@ public class CSVParserTest extends TestCase {
|
|||
*/
|
||||
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setIgnoreEmptyLines(false);
|
||||
parser.setCSVStrategy();
|
||||
parser.setCommentStart('#');
|
||||
parser.getStrategy().setIgnoreEmptyLines(false);
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
parser.getStrategy().setCommentStart('#');
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
|
||||
|
@ -198,8 +137,8 @@ public class CSVParserTest extends TestCase {
|
|||
*/
|
||||
String code = "a,\\,,b\n\\,,";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setCommentStart('#');
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
parser.getStrategy().setCommentStart('#');
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
// an unquoted single backslash is not an escape char
|
||||
|
@ -222,7 +161,7 @@ public class CSVParserTest extends TestCase {
|
|||
String code =
|
||||
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
|
||||
|
@ -246,7 +185,7 @@ public class CSVParserTest extends TestCase {
|
|||
+ ",\"\\,\""
|
||||
+ ",\"\"\"\"";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
|
||||
|
@ -269,9 +208,7 @@ public class CSVParserTest extends TestCase {
|
|||
*/
|
||||
String code = "a;'b and \\' more\n'\n!comment;;;;\n;;";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
parser.setDelimiter(';');
|
||||
parser.setEncapsulator('\'');
|
||||
parser.setCommentStart('!');
|
||||
parser.setStrategy( new CSVStrategy(';', '\'', '!') );
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(
|
||||
|
@ -344,7 +281,7 @@ public class CSVParserTest extends TestCase {
|
|||
{"\"hello\"", " \"world\"", "abc\ndef", ""}
|
||||
};
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setExcelStrategy();
|
||||
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -364,7 +301,7 @@ public class CSVParserTest extends TestCase {
|
|||
{"world", ""}
|
||||
};
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setExcelStrategy();
|
||||
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -397,7 +334,7 @@ public class CSVParserTest extends TestCase {
|
|||
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||
code = codes[codeIndex];
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setExcelStrategy();
|
||||
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -430,7 +367,7 @@ public class CSVParserTest extends TestCase {
|
|||
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||
code = codes[codeIndex];
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -460,7 +397,7 @@ public class CSVParserTest extends TestCase {
|
|||
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||
code = codes[codeIndex];
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setExcelStrategy();
|
||||
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -488,7 +425,7 @@ public class CSVParserTest extends TestCase {
|
|||
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||
code = codes[codeIndex];
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.setCSVStrategy();
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -663,7 +600,7 @@ public class CSVParserTest extends TestCase {
|
|||
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
System.out.println("---------\n" + code + "\n-------------");
|
||||
parser.setUnicodeEscapeInterpretation(true);
|
||||
parser.getStrategy().setUnicodeEscapeInterpretation(true);
|
||||
String[] data = parser.getLine();
|
||||
assertEquals(2, data.length);
|
||||
assertEquals("abc", data[0]);
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 2005 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
|
||||
/**
|
||||
* CSVStrategyTest
|
||||
*
|
||||
* The test are organized in three different sections:
|
||||
* The 'setter/getter' section, the lexer section and finally the strategy
|
||||
* section. In case a test fails, you should follow a top-down approach for
|
||||
* fixing a potential bug (its likely that the strategy itself fails if the lexer
|
||||
* has problems...).
|
||||
*/
|
||||
public class CSVStrategyTest extends TestCase {
|
||||
|
||||
/**
|
||||
* Constructor for JUnit.
|
||||
* @param name Name to be used in JUnit Test Environment
|
||||
*/
|
||||
public CSVStrategyTest(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Test suite for JUnit.
|
||||
* @return Test suite for JUnit
|
||||
*/
|
||||
public static Test suite() {
|
||||
return new TestSuite(CSVStrategyTest.class);
|
||||
}
|
||||
|
||||
|
||||
// ======================================================
|
||||
// getters / setters
|
||||
// ======================================================
|
||||
public void testGetSetCommentStart() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
CSVStrategy strategy = parser.getStrategy();
|
||||
strategy.setCommentStart('#');
|
||||
assertEquals(strategy.getCommentStart(), '#');
|
||||
strategy.setCommentStart('!');
|
||||
assertEquals(strategy.getCommentStart(), '!');
|
||||
}
|
||||
|
||||
public void testGetSetEncapsulator() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
CSVStrategy strategy = parser.getStrategy();
|
||||
strategy.setEncapsulator('"');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
strategy.setEncapsulator('\'');
|
||||
assertEquals(strategy.getEncapsulator(), '\'');
|
||||
}
|
||||
|
||||
public void testGetSetDelimiter() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
CSVStrategy strategy = parser.getStrategy();
|
||||
strategy.setDelimiter(';');
|
||||
assertEquals(strategy.getDelimiter(), ';');
|
||||
strategy.setDelimiter(',');
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
strategy.setDelimiter('\t');
|
||||
assertEquals(strategy.getDelimiter(), '\t');
|
||||
}
|
||||
|
||||
public void testSetCSVStrategy() {
|
||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||
CSVStrategy strategy = parser.getStrategy();
|
||||
// default settings
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), '\0');
|
||||
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, strategy.getIgnoreEmptyLines());
|
||||
// explicit csv settings
|
||||
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), '\0');
|
||||
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, strategy.getIgnoreEmptyLines());
|
||||
}
|
||||
|
||||
public void testSetExcelStrategy() {
|
||||
CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
|
||||
assertEquals(strategy.getDelimiter(), ';');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), '\0');
|
||||
assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(false, strategy.getIgnoreEmptyLines());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue