diff --git a/docs/reference/sql/endpoints/rest.asciidoc b/docs/reference/sql/endpoints/rest.asciidoc index 51e5aaa9fbf..779d95ea9c5 100644 --- a/docs/reference/sql/endpoints/rest.asciidoc +++ b/docs/reference/sql/endpoints/rest.asciidoc @@ -15,7 +15,7 @@ === Overview The SQL REST API accepts SQL in a JSON document, executes it, -and returns the results. +and returns the results. For example: [source,console] @@ -106,6 +106,10 @@ s|Description |=== +The `CSV` format accepts a formatting URL query attribute, `delimiter`, which indicates which character should be used to separate the CSV +values. It defaults to comma (`,`) and cannot take any of the following values: double quote (`"`), carriage-return (`\r`) and new-line (`\n`). +The tab (`\t`) can also not be used, the `tsv` format needs to be used instead. + Here are some examples for the human readable formats: ==== CSV @@ -120,7 +124,7 @@ POST /_sql?format=csv -------------------------------------------------- // TEST[setup:library] -Which returns: +which returns: [source,text] -------------------------------------------------- @@ -133,6 +137,31 @@ James S.A. Corey,Leviathan Wakes,561,2011-06-02T00:00:00.000Z -------------------------------------------------- // TESTRESPONSE[non_json] +or: + +[source,console] +-------------------------------------------------- +POST /_sql?format=csv&delimiter=%3b +{ + "query": "SELECT * FROM library ORDER BY page_count DESC", + "fetch_size": 5 +} +-------------------------------------------------- +// TEST[setup:library] + +which returns: + +[source,text] +-------------------------------------------------- +author;name;page_count;release_date +Peter F. Hamilton;Pandora's Star;768;2004-03-02T00:00:00.000Z +Vernor Vinge;A Fire Upon the Deep;613;1992-06-01T00:00:00.000Z +Frank Herbert;Dune;604;1965-06-01T00:00:00.000Z +Alastair Reynolds;Revelation Space;585;2000-03-15T00:00:00.000Z +James S.A. Corey;Leviathan Wakes;561;2011-06-02T00:00:00.000Z +-------------------------------------------------- +// TESTRESPONSE[non_json] + ==== JSON [source,console] @@ -210,7 +239,7 @@ Which returns: [source,text] -------------------------------------------------- - author | name | page_count | release_date + author | name | page_count | release_date -----------------+--------------------+---------------+------------------------ Peter F. Hamilton|Pandora's Star |768 |2004-03-02T00:00:00.000Z Vernor Vinge |A Fire Upon the Deep|613 |1992-06-01T00:00:00.000Z @@ -275,8 +304,8 @@ cursor: "sDXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAAEWWWdrRlVfSS1TbDYtcW9lc1FJNmlYdw==:BAFmB === Paginating through a large response Using the example from the <>, one can -continue to the next page by sending back the cursor field. In case of text -format, the cursor is returned as `Cursor` http header. +continue to the next page by sending back the cursor field. In the case of CSV, TSV and TXT +formats, the cursor is returned in the `Cursor` HTTP header. [source,console] -------------------------------------------------- diff --git a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/proto/Protocol.java b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/proto/Protocol.java index 27320d18c79..fea9a8752c6 100644 --- a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/proto/Protocol.java +++ b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/proto/Protocol.java @@ -50,15 +50,21 @@ public final class Protocol { public static final TimeValue PAGE_TIMEOUT = TimeValue.timeValueSeconds(45); public static final boolean FIELD_MULTI_VALUE_LENIENCY = false; public static final boolean INDEX_INCLUDE_FROZEN = false; - + /* - * Using the Boolean object here so that SqlTranslateRequest to set this to null (since it doesn't need a "columnar" or + * Using the Boolean object here so that SqlTranslateRequest to set this to null (since it doesn't need a "columnar" or * binary parameter). * See {@code SqlTranslateRequest.toXContent} */ public static final Boolean COLUMNAR = Boolean.FALSE; public static final Boolean BINARY_COMMUNICATION = null; + /* + * URL parameters + */ + public static final String URL_PARAM_FORMAT = "format"; + public static final String URL_PARAM_DELIMITER = "delimiter"; + /** * SQL-related endpoints */ diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/RestSqlQueryAction.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/RestSqlQueryAction.java index 5e5c333b1e1..e10686dec46 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/RestSqlQueryAction.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/RestSqlQueryAction.java @@ -24,16 +24,22 @@ import org.elasticsearch.xpack.sql.proto.Protocol; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Collections; import java.util.List; +import java.util.Set; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.unmodifiableList; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_DELIMITER; +import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_FORMAT; public class RestSqlQueryAction extends BaseRestHandler { + TextFormat textFormat; + @Override public List routes() { return emptyList(); @@ -77,7 +83,7 @@ public class RestSqlQueryAction extends BaseRestHandler { // enforce CBOR response for drivers and CLI (unless instructed differently through the config param) accept = XContentType.CBOR.name(); } else { - accept = request.param("format"); + accept = request.param(URL_PARAM_FORMAT); } if (accept == null) { accept = request.header("Accept"); @@ -99,7 +105,7 @@ public class RestSqlQueryAction extends BaseRestHandler { * which we turn into a 400 error. */ XContentType xContentType = accept == null ? XContentType.JSON : XContentType.fromMediaTypeOrFormat(accept); - TextFormat textFormat = xContentType == null ? TextFormat.fromMediaTypeOrFormat(accept) : null; + textFormat = xContentType == null ? TextFormat.fromMediaTypeOrFormat(accept) : null; if (xContentType == null && sqlRequest.columnar()) { throw new IllegalArgumentException("Invalid use of [columnar] argument: cannot be used in combination with " @@ -136,6 +142,11 @@ public class RestSqlQueryAction extends BaseRestHandler { }); } + @Override + protected Set responseParams() { + return textFormat == TextFormat.CSV ? Collections.singleton(URL_PARAM_DELIMITER) : Collections.emptySet(); + } + @Override public String getName() { return "sql_query"; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TextFormat.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TextFormat.java index dd837e65315..d230c318b2b 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TextFormat.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TextFormat.java @@ -17,6 +17,9 @@ import org.elasticsearch.xpack.sql.session.Cursor; import org.elasticsearch.xpack.sql.session.Cursors; import org.elasticsearch.xpack.sql.util.DateUtils; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.time.ZoneId; import java.time.ZonedDateTime; import java.util.List; @@ -25,6 +28,7 @@ import java.util.Objects; import java.util.function.Function; import static org.elasticsearch.xpack.sql.action.BasicFormatter.FormatOption.TEXT; +import static org.elasticsearch.xpack.sql.proto.Protocol.URL_PARAM_DELIMITER; /** * Templating class for displaying SQL responses in text formats. @@ -80,16 +84,16 @@ enum TextFormat { @Override String shortName() { - return "txt"; + return FORMAT_TEXT; } @Override String contentType() { - return "text/plain"; + return CONTENT_TYPE_TXT; } @Override - protected String delimiter() { + protected Character delimiter() { throw new UnsupportedOperationException(); } @@ -109,40 +113,68 @@ enum TextFormat { * */ CSV() { - @Override - protected String delimiter() { - return ","; + protected Character delimiter() { + return ','; } @Override protected String eol() { - //LFCR + //CRLF return "\r\n"; } @Override String shortName() { - return "csv"; + return FORMAT_CSV; } @Override String contentType() { - return "text/csv"; + return CONTENT_TYPE_CSV; } @Override String contentType(RestRequest request) { - return contentType() + "; charset=utf-8; header=" + (hasHeader(request) ? "present" : "absent"); + return contentType() + "; charset=utf-8; " + + URL_PARAM_HEADER + "=" + (hasHeader(request) ? PARAM_HEADER_PRESENT : PARAM_HEADER_ABSENT); } @Override - String maybeEscape(String value) { + protected Character delimiter(RestRequest request) { + String delimiterParam = request.param(URL_PARAM_DELIMITER); + if (delimiterParam == null) { + return delimiter(); + } + try { + delimiterParam = URLDecoder.decode(delimiterParam, StandardCharsets.UTF_8.toString()); + } catch (UnsupportedEncodingException uee) { + throw new IllegalArgumentException("delimiter [" + delimiterParam + "] cannot be decoded: " + uee.getMessage(), uee); + } + if (delimiterParam.length() != 1) { + throw new IllegalArgumentException("invalid " + + (delimiterParam.length() > 0 ? "multi-character" : "empty") + " delimiter [" + delimiterParam + "]"); + } + Character delimiter = delimiterParam.charAt(0); + switch (delimiter) { + case '"': + case '\n': + case '\r': + throw new IllegalArgumentException("illegal reserved character specified as delimiter [" + delimiter + "]"); + case '\t': + throw new IllegalArgumentException("illegal delimiter [TAB] specified as delimiter for the [csv] format; " + + "choose the [tsv] format instead"); + } + return delimiter; + } + + @Override + String maybeEscape(String value, Character delimiter) { boolean needsEscaping = false; for (int i = 0; i < value.length(); i++) { char c = value.charAt(i); - if (c == '"' || c == ',' || c == '\n' || c == '\r') { + if (c == '"' || c == '\n' || c == '\r' || c == delimiter) { needsEscaping = true; break; } @@ -162,20 +194,21 @@ enum TextFormat { sb.append('"'); value = sb.toString(); } + return value; } @Override boolean hasHeader(RestRequest request) { - String header = request.param("header"); + String header = request.param(URL_PARAM_HEADER); if (header == null) { List values = request.getAllHeaderValues("Accept"); if (values != null) { - // header is a parameter specified by ; so try breaking it down + // header values are separated by `;` so try breaking it down for (String value : values) { String[] params = Strings.tokenizeToStringArray(value, ";"); for (String param : params) { - if (param.toLowerCase(Locale.ROOT).equals("header=absent")) { + if (param.toLowerCase(Locale.ROOT).equals(URL_PARAM_HEADER + "=" + PARAM_HEADER_ABSENT)) { return false; } } @@ -183,31 +216,31 @@ enum TextFormat { } return true; } else { - return !header.toLowerCase(Locale.ROOT).equals("absent"); + return !header.toLowerCase(Locale.ROOT).equals(PARAM_HEADER_ABSENT); } } }, TSV() { @Override - protected String delimiter() { - return "\t"; + protected Character delimiter() { + return '\t'; } @Override protected String eol() { - // only CR + // only LF return "\n"; } @Override String shortName() { - return "tsv"; + return FORMAT_TSV; } @Override String contentType() { - return "text/tab-separated-values"; + return CONTENT_TYPE_TSV; } @Override @@ -216,7 +249,7 @@ enum TextFormat { } @Override - String maybeEscape(String value) { + String maybeEscape(String value, Character __) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < value.length(); i++) { @@ -237,17 +270,27 @@ enum TextFormat { } }; + private static final String FORMAT_TEXT = "txt"; + private static final String FORMAT_CSV = "csv"; + private static final String FORMAT_TSV = "tsv"; + private static final String CONTENT_TYPE_TXT = "text/plain"; + private static final String CONTENT_TYPE_CSV = "text/csv"; + private static final String CONTENT_TYPE_TSV = "text/tab-separated-values"; + private static final String URL_PARAM_HEADER = "header"; + private static final String PARAM_HEADER_ABSENT = "absent"; + private static final String PARAM_HEADER_PRESENT = "present"; String format(RestRequest request, SqlQueryResponse response) { StringBuilder sb = new StringBuilder(); // if the header is requested (and the column info is present - namely it's the first page) return the info if (hasHeader(request) && response.columns() != null) { - row(sb, response.columns(), ColumnInfo::name); + row(sb, response.columns(), ColumnInfo::name, delimiter(request)); } for (List row : response.rows()) { - row(sb, row, f -> f instanceof ZonedDateTime ? DateUtils.toString((ZonedDateTime) f) : Objects.toString(f, StringUtils.EMPTY)); + row(sb, row, f -> f instanceof ZonedDateTime ? DateUtils.toString((ZonedDateTime) f) : Objects.toString(f, StringUtils.EMPTY), + delimiter(request)); } return sb.toString(); @@ -292,11 +335,11 @@ enum TextFormat { } // utility method for consuming a row. - void row(StringBuilder sb, List row, Function toString) { + void row(StringBuilder sb, List row, Function toString, Character delimiter) { for (int i = 0; i < row.size(); i++) { - sb.append(maybeEscape(toString.apply(row.get(i)))); + sb.append(maybeEscape(toString.apply(row.get(i)), delimiter)); if (i < row.size() - 1) { - sb.append(delimiter()); + sb.append(delimiter); } } sb.append(eol()); @@ -305,7 +348,11 @@ enum TextFormat { /** * Delimiter between fields */ - protected abstract String delimiter(); + protected abstract Character delimiter(); + + protected Character delimiter(RestRequest request) { + return delimiter(); + } /** * String indicating end-of-line or row. @@ -315,7 +362,7 @@ enum TextFormat { /** * Method used for escaping (if needed) a given value. */ - String maybeEscape(String value) { + String maybeEscape(String value, Character delimiter) { return value; } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plugin/TextFormatTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plugin/TextFormatTests.java index a709133b601..67b4d17a629 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plugin/TextFormatTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/plugin/TextFormatTests.java @@ -15,7 +15,9 @@ import org.elasticsearch.xpack.sql.proto.ColumnInfo; import org.elasticsearch.xpack.sql.proto.Mode; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; @@ -52,7 +54,7 @@ public class TextFormatTests extends ESTestCase { } public void testCsvContentTypeWithoutHeader() { - assertEquals("text/csv; charset=utf-8; header=absent", CSV.contentType(reqNoHeader())); + assertEquals("text/csv; charset=utf-8; header=absent", CSV.contentType(reqWithParam("header", "absent"))); } public void testTsvContentType() { @@ -60,19 +62,20 @@ public class TextFormatTests extends ESTestCase { } public void testCsvEscaping() { - assertEquals("string", CSV.maybeEscape("string")); - assertEquals("", CSV.maybeEscape("")); - assertEquals("\"\"\"\"", CSV.maybeEscape("\"")); - assertEquals("\"\"\",\"\"\"", CSV.maybeEscape("\",\"")); - assertEquals("\"\"\"quo\"\"ted\"\"\"", CSV.maybeEscape("\"quo\"ted\"")); + assertEquals("string", CSV.maybeEscape("string", CSV.delimiter())); + assertEquals("", CSV.maybeEscape("", CSV.delimiter())); + assertEquals("\"\"\"\"", CSV.maybeEscape("\"", CSV.delimiter())); + assertEquals("\"\"\",\"\"\"", CSV.maybeEscape("\",\"", CSV.delimiter())); + assertEquals("\"\"\"quo\"\"ted\"\"\"", CSV.maybeEscape("\"quo\"ted\"", CSV.delimiter())); + assertEquals("\"one;two\"", CSV.maybeEscape("one;two", ';')); } public void testTsvEscaping() { - assertEquals("string", TSV.maybeEscape("string")); - assertEquals("", TSV.maybeEscape("")); - assertEquals("\"", TSV.maybeEscape("\"")); - assertEquals("\\t", TSV.maybeEscape("\t")); - assertEquals("\\n\"\\t", TSV.maybeEscape("\n\"\t")); + assertEquals("string", TSV.maybeEscape("string", null)); + assertEquals("", TSV.maybeEscape("", null)); + assertEquals("\"", TSV.maybeEscape("\"", null)); + assertEquals("\\t", TSV.maybeEscape("\t", null)); + assertEquals("\\n\"\\t", TSV.maybeEscape("\n\"\t", null)); } public void testCsvFormatWithEmptyData() { @@ -90,7 +93,32 @@ public class TextFormatTests extends ESTestCase { assertEquals("string,number\r\n" + "Along The River Bank,708\r\n" + "Mind Train,280\r\n", - text); + text); + } + + public void testCsvFormatNoHeaderWithRegularData() { + String text = CSV.format(reqWithParam("header", "absent"), regularData()); + assertEquals("Along The River Bank,708\r\n" + + "Mind Train,280\r\n", + text); + } + + public void testCsvFormatWithCustomDelimiterRegularData() { + List forbidden = Arrays.asList('"', '\r', '\n', '\t'); + Character delim = randomValueOtherThanMany(forbidden::contains, () -> randomAlphaOfLength(1).charAt(0)); + String text = CSV.format(reqWithParam("delimiter", String.valueOf(delim)), regularData()); + List terms = Arrays.asList("string", "number", "Along The River Bank", "708", "Mind Train", "280"); + List expectedTerms = terms.stream() + .map(x -> x.contains(String.valueOf(delim)) ? '"' + x + '"' : x) + .collect(Collectors.toList()); + StringBuffer sb = new StringBuffer(); + do { + sb.append(expectedTerms.remove(0)); + sb.append(delim); + sb.append(expectedTerms.remove(0)); + sb.append("\r\n"); + } while (expectedTerms.size() > 0); + assertEquals(sb.toString(), text); } public void testTsvFormatWithRegularData() { @@ -106,6 +134,14 @@ public class TextFormatTests extends ESTestCase { assertEquals("first,\"\"\"special\"\"\"\r\n" + "normal,\"\"\"quo\"\"ted\"\",\n\"\r\n" + "commas,\"a,b,c,\n,d,e,\t\n\"\r\n" + , text); + } + + public void testCsvFormatWithCustomDelimiterEscapedData() { + String text = CSV.format(reqWithParam("delimiter", "\\"), escapedData()); + assertEquals("first\\\"\"\"special\"\"\"\r\n" + + "normal\\\"\"\"quo\"\"ted\"\",\n\"\r\n" + + "commas\\\"a,b,c,\n,d,e,\t\n\"\r\n" , text); } @@ -117,6 +153,25 @@ public class TextFormatTests extends ESTestCase { , text); } + public void testInvalidCsvDelims() { + List invalid = Arrays.asList("\"", "\r", "\n", "\t", "", "ab"); + + for (String c: invalid) { + Exception e = expectThrows(IllegalArgumentException.class, + () -> CSV.format(reqWithParam("delimiter", c), emptyData())); + String msg; + if (c.length() == 1) { + msg = c.equals("\t") + ? "illegal delimiter [TAB] specified as delimiter for the [csv] format; choose the [tsv] format instead" + : "illegal reserved character specified as delimiter [" + c + "]"; + } else { + msg = "invalid " + (c.length() > 0 ? "multi-character" : "empty") + " delimiter [" + c + "]"; + } + assertEquals(msg, e.getMessage()); + } + } + + private static SqlQueryResponse emptyData() { return new SqlQueryResponse(null, Mode.JDBC, false, singletonList(new ColumnInfo("index", "name", "keyword")), emptyList()); } @@ -153,7 +208,7 @@ public class TextFormatTests extends ESTestCase { return new FakeRestRequest(); } - private static RestRequest reqNoHeader() { - return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY).withParams(singletonMap("header", "absent")).build(); + private static RestRequest reqWithParam(String paramName, String paramVal) { + return new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY).withParams(singletonMap(paramName, paramVal)).build(); } }