SQL: Add option to provide the delimiter for the CSV format (#59907) (#60420)

* SQL: Add option to provide the delimiter for the CSV format (#59907)

* Add option to provide the delimiter to the CSV fmt

This adds the option to provide the desired character as the separator
for the CSV format (the default remains comma).
A set of characters are excluded though - like CR, LF, `"` - to avoid
slipping onto the CSV-dialects slope. The tab is also forbidden, the
user needs to choose the "tsv" format explicitely.

Update the doc to make it clear that the textual CSV, TSV and TXT
formats pass the cursor back to the user through the Cursor HTTP header.

(cherry picked from commit 3a8b00cc7480f7ada57fcea3cbac957facac08fc)

* Java8 fixes

- replace Set#of();
- URLDecoder#decode() requires a string (vs a charset) as 2nd arg.
This commit is contained in:
Bogdan Pintea 2020-07-29 21:40:11 +02:00 committed by GitHub
parent 30610d962a
commit 8c22adc447
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 200 additions and 52 deletions

View File

@ -15,7 +15,7 @@
=== Overview
The SQL REST API accepts SQL in a JSON document, executes it,
and returns the results.
and returns the results.
For example:
[source,console]
@ -106,6 +106,10 @@ s|Description
|===
The `CSV` format accepts a formatting URL query attribute, `delimiter`, which indicates which character should be used to separate the CSV
values. It defaults to comma (`,`) and cannot take any of the following values: double quote (`"`), carriage-return (`\r`) and new-line (`\n`).
The tab (`\t`) can also not be used, the `tsv` format needs to be used instead.
Here are some examples for the human readable formats:
==== CSV
@ -120,7 +124,7 @@ POST /_sql?format=csv
--------------------------------------------------
// TEST[setup:library]
Which returns:
which returns:
[source,text]
--------------------------------------------------
@ -133,6 +137,31 @@ James S.A. Corey,Leviathan Wakes,561,2011-06-02T00:00:00.000Z
--------------------------------------------------
// TESTRESPONSE[non_json]
or:
[source,console]
--------------------------------------------------
POST /_sql?format=csv&delimiter=%3b
{
"query": "SELECT * FROM library ORDER BY page_count DESC",
"fetch_size": 5
}
--------------------------------------------------
// TEST[setup:library]
which returns:
[source,text]
--------------------------------------------------
author;name;page_count;release_date
Peter F. Hamilton;Pandora's Star;768;2004-03-02T00:00:00.000Z
Vernor Vinge;A Fire Upon the Deep;613;1992-06-01T00:00:00.000Z
Frank Herbert;Dune;604;1965-06-01T00:00:00.000Z
Alastair Reynolds;Revelation Space;585;2000-03-15T00:00:00.000Z
James S.A. Corey;Leviathan Wakes;561;2011-06-02T00:00:00.000Z
--------------------------------------------------
// TESTRESPONSE[non_json]
==== JSON
[source,console]
@ -210,7 +239,7 @@ Which returns:
[source,text]
--------------------------------------------------
author | name | page_count | release_date
author | name | page_count | release_date
-----------------+--------------------+---------------+------------------------
Peter F. Hamilton|Pandora's Star |768 |2004-03-02T00:00:00.000Z
Vernor Vinge |A Fire Upon the Deep|613 |1992-06-01T00:00:00.000Z
@ -275,8 +304,8 @@ cursor: "sDXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAAEWWWdrRlVfSS1TbDYtcW9lc1FJNmlYdw==:BAFmB
=== Paginating through a large response
Using the example from the <<sql-rest-format,previous section>>, one can
continue to the next page by sending back the cursor field. In case of text
format, the cursor is returned as `Cursor` http header.
continue to the next page by sending back the cursor field. In the case of CSV, TSV and TXT
formats, the cursor is returned in the `Cursor` HTTP header.
[source,console]
--------------------------------------------------

View File

@ -50,15 +50,21 @@ public final class Protocol {
public static final TimeValue PAGE_TIMEOUT = TimeValue.timeValueSeconds(45);
public static final boolean FIELD_MULTI_VALUE_LENIENCY = false;
public static final boolean INDEX_INCLUDE_FROZEN = false;
/*
* Using the Boolean object here so that SqlTranslateRequest to set this to null (since it doesn't need a "columnar" or
* Using the Boolean object here so that SqlTranslateRequest to set this to null (since it doesn't need a "columnar" or
* binary parameter).
* See {@code SqlTranslateRequest.toXContent}
*/
public static final Boolean COLUMNAR = Boolean.FALSE;
public static final Boolean BINARY_COMMUNICATION = null;
/*
* URL parameters
*/
public static final String URL_PARAM_FORMAT = "format";
public static final String URL_PARAM_DELIMITER = "delimiter";
/**
* SQL-related endpoints
*/

View File

@ -24,16 +24,22 @@ import org.elasticsearch.xpack.sql.proto.Protocol;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
import static java.util.Collections.unmodifiableList;
import static org.elasticsearch.rest.RestRequest.Method.GET;
import static org.elasticsearch.rest.RestRequest.Method.POST;
import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_DELIMITER;
import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_FORMAT;
public class RestSqlQueryAction extends BaseRestHandler {
TextFormat textFormat;
@Override
public List<Route> routes() {
return emptyList();
@ -77,7 +83,7 @@ public class RestSqlQueryAction extends BaseRestHandler {
// enforce CBOR response for drivers and CLI (unless instructed differently through the config param)
accept = XContentType.CBOR.name();
} else {
accept = request.param("format");
accept = request.param(URL_PARAM_FORMAT);
}
if (accept == null) {
accept = request.header("Accept");
@ -99,7 +105,7 @@ public class RestSqlQueryAction extends BaseRestHandler {
* which we turn into a 400 error.
*/
XContentType xContentType = accept == null ? XContentType.JSON : XContentType.fromMediaTypeOrFormat(accept);
TextFormat textFormat = xContentType == null ? TextFormat.fromMediaTypeOrFormat(accept) : null;
textFormat = xContentType == null ? TextFormat.fromMediaTypeOrFormat(accept) : null;
if (xContentType == null && sqlRequest.columnar()) {
throw new IllegalArgumentException("Invalid use of [columnar] argument: cannot be used in combination with "
@ -136,6 +142,11 @@ public class RestSqlQueryAction extends BaseRestHandler {
});
}
@Override
protected Set<String> responseParams() {
return textFormat == TextFormat.CSV ? Collections.singleton(URL_PARAM_DELIMITER) : Collections.emptySet();
}
@Override
public String getName() {
return "sql_query";

View File

@ -17,6 +17,9 @@ import org.elasticsearch.xpack.sql.session.Cursor;
import org.elasticsearch.xpack.sql.session.Cursors;
import org.elasticsearch.xpack.sql.util.DateUtils;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.List;
@ -25,6 +28,7 @@ import java.util.Objects;
import java.util.function.Function;
import static org.elasticsearch.xpack.sql.action.BasicFormatter.FormatOption.TEXT;
import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_DELIMITER;
/**
* Templating class for displaying SQL responses in text formats.
@ -80,16 +84,16 @@ enum TextFormat {
@Override
String shortName() {
return "txt";
return FORMAT_TEXT;
}
@Override
String contentType() {
return "text/plain";
return CONTENT_TYPE_TXT;
}
@Override
protected String delimiter() {
protected Character delimiter() {
throw new UnsupportedOperationException();
}
@ -109,40 +113,68 @@ enum TextFormat {
*
*/
CSV() {
@Override
protected String delimiter() {
return ",";
protected Character delimiter() {
return ',';
}
@Override
protected String eol() {
//LFCR
//CRLF
return "\r\n";
}
@Override
String shortName() {
return "csv";
return FORMAT_CSV;
}
@Override
String contentType() {
return "text/csv";
return CONTENT_TYPE_CSV;
}
@Override
String contentType(RestRequest request) {
return contentType() + "; charset=utf-8; header=" + (hasHeader(request) ? "present" : "absent");
return contentType() + "; charset=utf-8; " +
URL_PARAM_HEADER + "=" + (hasHeader(request) ? PARAM_HEADER_PRESENT : PARAM_HEADER_ABSENT);
}
@Override
String maybeEscape(String value) {
protected Character delimiter(RestRequest request) {
String delimiterParam = request.param(URL_PARAM_DELIMITER);
if (delimiterParam == null) {
return delimiter();
}
try {
delimiterParam = URLDecoder.decode(delimiterParam, StandardCharsets.UTF_8.toString());
} catch (UnsupportedEncodingException uee) {
throw new IllegalArgumentException("delimiter [" + delimiterParam + "] cannot be decoded: " + uee.getMessage(), uee);
}
if (delimiterParam.length() != 1) {
throw new IllegalArgumentException("invalid " +
(delimiterParam.length() > 0 ? "multi-character" : "empty") + " delimiter [" + delimiterParam + "]");
}
Character delimiter = delimiterParam.charAt(0);
switch (delimiter) {
case '"':
case '\n':
case '\r':
throw new IllegalArgumentException("illegal reserved character specified as delimiter [" + delimiter + "]");
case '\t':
throw new IllegalArgumentException("illegal delimiter [TAB] specified as delimiter for the [csv] format; " +
"choose the [tsv] format instead");
}
return delimiter;
}
@Override
String maybeEscape(String value, Character delimiter) {
boolean needsEscaping = false;
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if (c == '"' || c == ',' || c == '\n' || c == '\r') {
if (c == '"' || c == '\n' || c == '\r' || c == delimiter) {
needsEscaping = true;
break;
}
@ -162,20 +194,21 @@ enum TextFormat {
sb.append('"');
value = sb.toString();
}
return value;
}
@Override
boolean hasHeader(RestRequest request) {
String header = request.param("header");
String header = request.param(URL_PARAM_HEADER);
if (header == null) {
List<String> values = request.getAllHeaderValues("Accept");
if (values != null) {
// header is a parameter specified by ; so try breaking it down
// header values are separated by `;` so try breaking it down
for (String value : values) {
String[] params = Strings.tokenizeToStringArray(value, ";");
for (String param : params) {
if (param.toLowerCase(Locale.ROOT).equals("header=absent")) {
if (param.toLowerCase(Locale.ROOT).equals(URL_PARAM_HEADER + "=" + PARAM_HEADER_ABSENT)) {
return false;
}
}
@ -183,31 +216,31 @@ enum TextFormat {
}
return true;
} else {
return !header.toLowerCase(Locale.ROOT).equals("absent");
return !header.toLowerCase(Locale.ROOT).equals(PARAM_HEADER_ABSENT);
}
}
},
TSV() {
@Override
protected String delimiter() {
return "\t";
protected Character delimiter() {
return '\t';
}
@Override
protected String eol() {
// only CR
// only LF
return "\n";
}
@Override
String shortName() {
return "tsv";
return FORMAT_TSV;
}
@Override
String contentType() {
return "text/tab-separated-values";
return CONTENT_TYPE_TSV;
}
@Override
@ -216,7 +249,7 @@ enum TextFormat {
}
@Override
String maybeEscape(String value) {
String maybeEscape(String value, Character __) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < value.length(); i++) {
@ -237,17 +270,27 @@ enum TextFormat {
}
};
private static final String FORMAT_TEXT = "txt";
private static final String FORMAT_CSV = "csv";
private static final String FORMAT_TSV = "tsv";
private static final String CONTENT_TYPE_TXT = "text/plain";
private static final String CONTENT_TYPE_CSV = "text/csv";
private static final String CONTENT_TYPE_TSV = "text/tab-separated-values";
private static final String URL_PARAM_HEADER = "header";
private static final String PARAM_HEADER_ABSENT = "absent";
private static final String PARAM_HEADER_PRESENT = "present";
String format(RestRequest request, SqlQueryResponse response) {
StringBuilder sb = new StringBuilder();
// if the header is requested (and the column info is present - namely it's the first page) return the info
if (hasHeader(request) && response.columns() != null) {
row(sb, response.columns(), ColumnInfo::name);
row(sb, response.columns(), ColumnInfo::name, delimiter(request));
}
for (List<Object> row : response.rows()) {
row(sb, row, f -> f instanceof ZonedDateTime ? DateUtils.toString((ZonedDateTime) f) : Objects.toString(f, StringUtils.EMPTY));
row(sb, row, f -> f instanceof ZonedDateTime ? DateUtils.toString((ZonedDateTime) f) : Objects.toString(f, StringUtils.EMPTY),
delimiter(request));
}
return sb.toString();
@ -292,11 +335,11 @@ enum TextFormat {
}
// utility method for consuming a row.
<F> void row(StringBuilder sb, List<F> row, Function<F, String> toString) {
<F> void row(StringBuilder sb, List<F> row, Function<F, String> toString, Character delimiter) {
for (int i = 0; i < row.size(); i++) {
sb.append(maybeEscape(toString.apply(row.get(i))));
sb.append(maybeEscape(toString.apply(row.get(i)), delimiter));
if (i < row.size() - 1) {
sb.append(delimiter());
sb.append(delimiter);
}
}
sb.append(eol());
@ -305,7 +348,11 @@ enum TextFormat {
/**
* Delimiter between fields
*/
protected abstract String delimiter();
protected abstract Character delimiter();
protected Character delimiter(RestRequest request) {
return delimiter();
}
/**
* String indicating end-of-line or row.
@ -315,7 +362,7 @@ enum TextFormat {
/**
* Method used for escaping (if needed) a given value.
*/
String maybeEscape(String value) {
String maybeEscape(String value, Character delimiter) {
return value;
}
}

View File

@ -15,7 +15,9 @@ import org.elasticsearch.xpack.sql.proto.ColumnInfo;
import org.elasticsearch.xpack.sql.proto.Mode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
@ -52,7 +54,7 @@ public class TextFormatTests extends ESTestCase {
}
public void testCsvContentTypeWithoutHeader() {
assertEquals("text/csv; charset=utf-8; header=absent", CSV.contentType(reqNoHeader()));
assertEquals("text/csv; charset=utf-8; header=absent", CSV.contentType(reqWithParam("header", "absent")));
}
public void testTsvContentType() {
@ -60,19 +62,20 @@ public class TextFormatTests extends ESTestCase {
}
public void testCsvEscaping() {
assertEquals("string", CSV.maybeEscape("string"));
assertEquals("", CSV.maybeEscape(""));
assertEquals("\"\"\"\"", CSV.maybeEscape("\""));
assertEquals("\"\"\",\"\"\"", CSV.maybeEscape("\",\""));
assertEquals("\"\"\"quo\"\"ted\"\"\"", CSV.maybeEscape("\"quo\"ted\""));
assertEquals("string", CSV.maybeEscape("string", CSV.delimiter()));
assertEquals("", CSV.maybeEscape("", CSV.delimiter()));
assertEquals("\"\"\"\"", CSV.maybeEscape("\"", CSV.delimiter()));
assertEquals("\"\"\",\"\"\"", CSV.maybeEscape("\",\"", CSV.delimiter()));
assertEquals("\"\"\"quo\"\"ted\"\"\"", CSV.maybeEscape("\"quo\"ted\"", CSV.delimiter()));
assertEquals("\"one;two\"", CSV.maybeEscape("one;two", ';'));
}
public void testTsvEscaping() {
assertEquals("string", TSV.maybeEscape("string"));
assertEquals("", TSV.maybeEscape(""));
assertEquals("\"", TSV.maybeEscape("\""));
assertEquals("\\t", TSV.maybeEscape("\t"));
assertEquals("\\n\"\\t", TSV.maybeEscape("\n\"\t"));
assertEquals("string", TSV.maybeEscape("string", null));
assertEquals("", TSV.maybeEscape("", null));
assertEquals("\"", TSV.maybeEscape("\"", null));
assertEquals("\\t", TSV.maybeEscape("\t", null));
assertEquals("\\n\"\\t", TSV.maybeEscape("\n\"\t", null));
}
public void testCsvFormatWithEmptyData() {
@ -90,7 +93,32 @@ public class TextFormatTests extends ESTestCase {
assertEquals("string,number\r\n" +
"Along The River Bank,708\r\n" +
"Mind Train,280\r\n",
text);
text);
}
public void testCsvFormatNoHeaderWithRegularData() {
String text = CSV.format(reqWithParam("header", "absent"), regularData());
assertEquals("Along The River Bank,708\r\n" +
"Mind Train,280\r\n",
text);
}
public void testCsvFormatWithCustomDelimiterRegularData() {
List<Character> forbidden = Arrays.asList('"', '\r', '\n', '\t');
Character delim = randomValueOtherThanMany(forbidden::contains, () -> randomAlphaOfLength(1).charAt(0));
String text = CSV.format(reqWithParam("delimiter", String.valueOf(delim)), regularData());
List<String> terms = Arrays.asList("string", "number", "Along The River Bank", "708", "Mind Train", "280");
List<String> expectedTerms = terms.stream()
.map(x -> x.contains(String.valueOf(delim)) ? '"' + x + '"' : x)
.collect(Collectors.toList());
StringBuffer sb = new StringBuffer();
do {
sb.append(expectedTerms.remove(0));
sb.append(delim);
sb.append(expectedTerms.remove(0));
sb.append("\r\n");
} while (expectedTerms.size() > 0);
assertEquals(sb.toString(), text);
}
public void testTsvFormatWithRegularData() {
@ -106,6 +134,14 @@ public class TextFormatTests extends ESTestCase {
assertEquals("first,\"\"\"special\"\"\"\r\n" +
"normal,\"\"\"quo\"\"ted\"\",\n\"\r\n" +
"commas,\"a,b,c,\n,d,e,\t\n\"\r\n"
, text);
}
public void testCsvFormatWithCustomDelimiterEscapedData() {
String text = CSV.format(reqWithParam("delimiter", "\\"), escapedData());
assertEquals("first\\\"\"\"special\"\"\"\r\n" +
"normal\\\"\"\"quo\"\"ted\"\",\n\"\r\n" +
"commas\\\"a,b,c,\n,d,e,\t\n\"\r\n"
, text);
}
@ -117,6 +153,25 @@ public class TextFormatTests extends ESTestCase {
, text);
}
public void testInvalidCsvDelims() {
List<String> invalid = Arrays.asList("\"", "\r", "\n", "\t", "", "ab");
for (String c: invalid) {
Exception e = expectThrows(IllegalArgumentException.class,
() -> CSV.format(reqWithParam("delimiter", c), emptyData()));
String msg;
if (c.length() == 1) {
msg = c.equals("\t")
? "illegal delimiter [TAB] specified as delimiter for the [csv] format; choose the [tsv] format instead"
: "illegal reserved character specified as delimiter [" + c + "]";
} else {
msg = "invalid " + (c.length() > 0 ? "multi-character" : "empty") + " delimiter [" + c + "]";
}
assertEquals(msg, e.getMessage());
}
}
private static SqlQueryResponse emptyData() {
return new SqlQueryResponse(null, Mode.JDBC, false, singletonList(new ColumnInfo("index", "name", "keyword")), emptyList());
}
@ -153,7 +208,7 @@ public class TextFormatTests extends ESTestCase {
return new FakeRestRequest();
}
private static RestRequest reqNoHeader() {
return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY).withParams(singletonMap("header", "absent")).build();
private static RestRequest reqWithParam(String paramName, String paramVal) {
return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY).withParams(singletonMap(paramName, paramVal)).build();
}
}