mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-08 22:14:59 +00:00
This change fixes problem when using space or tab as a separator in CSV processor - we check if current character is separator before we check if it is whitespace. This also improves tests to always check all combinations of separators and quotes. Closes #67013
This commit is contained in:
parent
6175e18e92
commit
fdd2a9c235
@ -127,8 +127,6 @@ final class CsvParser {
|
|||||||
char c = currentChar();
|
char c = currentChar();
|
||||||
if (c == LF || c == CR || c == quote) {
|
if (c == LF || c == CR || c == quote) {
|
||||||
throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex);
|
throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex);
|
||||||
} else if (trim && isWhitespace(c)) {
|
|
||||||
spaceCount++;
|
|
||||||
} else if (c == separator) {
|
} else if (c == separator) {
|
||||||
state = State.START;
|
state = State.START;
|
||||||
if (setField(currentIndex - spaceCount)) {
|
if (setField(currentIndex - spaceCount)) {
|
||||||
@ -136,6 +134,8 @@ final class CsvParser {
|
|||||||
}
|
}
|
||||||
startIndex = currentIndex + 1;
|
startIndex = currentIndex + 1;
|
||||||
return false;
|
return false;
|
||||||
|
} else if (trim && isWhitespace(c)) {
|
||||||
|
spaceCount++;
|
||||||
} else {
|
} else {
|
||||||
spaceCount = 0;
|
spaceCount = 0;
|
||||||
}
|
}
|
||||||
@ -163,20 +163,20 @@ final class CsvParser {
|
|||||||
boolean shouldSetField = true;
|
boolean shouldSetField = true;
|
||||||
for (; currentIndex < length; currentIndex++) {
|
for (; currentIndex < length; currentIndex++) {
|
||||||
c = currentChar();
|
c = currentChar();
|
||||||
if (isWhitespace(c)) {
|
if (c == separator) {
|
||||||
if (shouldSetField) {
|
|
||||||
if (setField(currentIndex - 1)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
shouldSetField = false;
|
|
||||||
}
|
|
||||||
} else if (c == separator) {
|
|
||||||
if (shouldSetField && setField(currentIndex - 1)) {
|
if (shouldSetField && setField(currentIndex - 1)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
startIndex = currentIndex + 1;
|
startIndex = currentIndex + 1;
|
||||||
state = State.START;
|
state = State.START;
|
||||||
return false;
|
return false;
|
||||||
|
} else if (isWhitespace(c)) {
|
||||||
|
if (shouldSetField) {
|
||||||
|
if (setField(currentIndex - 1)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
shouldSetField = false;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex);
|
throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex);
|
||||||
}
|
}
|
||||||
|
@ -24,33 +24,36 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
|||||||
import org.elasticsearch.ingest.IngestDocument;
|
import org.elasticsearch.ingest.IngestDocument;
|
||||||
import org.elasticsearch.ingest.RandomDocumentPicks;
|
import org.elasticsearch.ingest.RandomDocumentPicks;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
import org.junit.Before;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class CsvProcessorTests extends ESTestCase {
|
public class CsvProcessorTests extends ESTestCase {
|
||||||
|
|
||||||
private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.'};
|
private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.', '\t'};
|
||||||
|
private static final String[] QUOTES = new String[]{"'", "\"", ""};
|
||||||
private final String quote;
|
private final String quote;
|
||||||
private char separator;
|
private final char separator;
|
||||||
|
|
||||||
|
|
||||||
public CsvProcessorTests(@Name("quote") String quote) {
|
public CsvProcessorTests(@Name("quote") String quote, @Name("separator") char separator) {
|
||||||
this.quote = quote;
|
this.quote = quote;
|
||||||
|
this.separator = separator;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParametersFactory
|
@ParametersFactory
|
||||||
public static Iterable<Object[]> parameters() {
|
public static Iterable<Object[]> parameters() {
|
||||||
return Arrays.asList(new Object[]{"'"}, new Object[]{"\""}, new Object[]{""});
|
LinkedList<Object[]> list = new LinkedList<>();
|
||||||
|
for (Character separator : SEPARATORS) {
|
||||||
|
for (String quote : QUOTES) {
|
||||||
|
list.add(new Object[]{quote, separator});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
@Before
|
return list;
|
||||||
public void setup() {
|
|
||||||
separator = randomFrom(SEPARATORS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExactNumberOfFields() {
|
public void testExactNumberOfFields() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user