NIFI-7410 Update JdbcCommon.java when javaSqlType is CLOB or NCLOB in convertToAvroStream method, use the DataTypeUtils.toString(clob/nClob,(String) null, StandardCharsets.UTF_8)) method to get the String

NIFI-7410 Update JdbcCommon.java when javaSqlType is CLOB or NCLOB in convertToAvroStream method, use the CharacterStream rto read the value of CLOB
NIFI-7410 Add a unit test. validate if it's unreadable when the clob value is Chinese, Japanese, and Korean.

Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com>

This closes #4243.
This commit is contained in:
zhangcheng 2020-04-30 16:55:14 +08:00 committed by Pierre Villard
parent a72c3d685c
commit e18b4f0c75
No known key found for this signature in database
GPG Key ID: F92A93B30C07C6D5
2 changed files with 143 additions and 29 deletions

View File

@ -58,10 +58,8 @@ import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.NClob;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
@ -256,36 +254,18 @@ public class JdbcCommon {
final Schema fieldSchema = schema.getFields().get(i - 1).schema();
// Need to handle CLOB and BLOB before getObject() is called, due to ResultSet's maximum portability statement
if (javaSqlType == CLOB) {
if (javaSqlType == CLOB || javaSqlType == NCLOB) {
Clob clob = rs.getClob(i);
if (clob != null) {
long numChars = clob.length();
char[] buffer = new char[(int) numChars];
InputStream is = clob.getAsciiStream();
int index = 0;
int c = is.read();
while (c >= 0) {
buffer[index++] = (char) c;
c = is.read();
StringBuilder sb = new StringBuilder();
char[] buffer = new char[32 * 1024]; // 32K default buffer
try (Reader reader = clob.getCharacterStream()) {
int charsRead;
while ((charsRead = reader.read(buffer)) != -1) {
sb.append(buffer, 0, charsRead);
}
}
rec.put(i - 1, new String(buffer));
clob.free();
} else {
rec.put(i - 1, null);
}
continue;
}
if (javaSqlType == NCLOB) {
NClob nClob = rs.getNClob(i);
if (nClob != null) {
final Reader characterStream = nClob.getCharacterStream();
long numChars = (int) nClob.length();
final CharBuffer buffer = CharBuffer.allocate((int) numChars);
characterStream.read(buffer);
buffer.flip();
rec.put(i - 1, buffer.toString());
nClob.free();
rec.put(i - 1, sb.toString());
} else {
rec.put(i - 1, null);
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.util.db;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import static org.junit.Assert.assertNotNull;
public class TestJdbcClobReadable {
@Rule
public TemporaryFolder folder = new TemporaryFolder();
@BeforeClass
public static void setup() {
System.setProperty("derby.stream.error.file", "target/derby.log");
}
String createTable = "create table users ("
+ " id int NOT NULL GENERATED ALWAYS AS IDENTITY, "
+ " email varchar(255) NOT NULL UNIQUE, "
+ " password varchar(255) DEFAULT NULL, "
+ " someclob CLOB default null, "
+ " PRIMARY KEY (id) ) ";
String dropTable = "drop table users";
@Test
public void testClobWithChinese() throws SQLException, IOException, ClassNotFoundException {
String chineseContent = "中国China";
validateClob(chineseContent);
}
@Test
public void testClobWithJapanese() throws SQLException, IOException, ClassNotFoundException {
String japaneseContent = "にほんJapan";
validateClob(japaneseContent);
}
@Test
public void testClobWithKorean() throws SQLException, IOException, ClassNotFoundException {
String koreanContent = "にほんKorea";
validateClob(koreanContent);
}
// many test use Derby as database, so ensure driver is available
@Test
public void testDriverLoad() throws ClassNotFoundException {
final Class<?> clazz = Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
assertNotNull(clazz);
}
private void validateClob(String someClob) throws SQLException, ClassNotFoundException, IOException {
folder.delete();
final Connection con = createConnection(folder.getRoot().getAbsolutePath());
final Statement st = con.createStatement();
try {
st.executeUpdate(dropTable);
} catch (final Exception e) {
// table may not exist, this is not serious problem.
}
st.executeUpdate(createTable);
st.executeUpdate(String.format("insert into users (email, password, someClob) "
+ " values ('robert.gates@cold.com', '******', '%s')", someClob));
final ResultSet resultSet = st.executeQuery("select U.SOMECLOB from users U");
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(resultSet, outStream, false);
final byte[] serializedBytes = outStream.toByteArray();
assertNotNull(serializedBytes);
System.out.println("Avro serialized result size in bytes: " + serializedBytes.length);
st.close();
con.close();
// Deserialize bytes to records
final InputStream inputStream = new ByteArrayInputStream(serializedBytes);
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(inputStream, datumReader)) {
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
Assert.assertEquals("Unreadable code for this Clob value.", someClob, record.get("SOMECLOB").toString());
System.out.println(record);
}
}
}
private Connection createConnection(String location) throws ClassNotFoundException, SQLException {
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
return DriverManager.getConnection("jdbc:derby:" + location + ";create=true");
}
}