mirror of https://github.com/apache/nifi.git
NIFI-3335: Add initial.maxvalue support to GenerateTableFetch
Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com> This closes #2039.
This commit is contained in:
parent
759f81bc1b
commit
dc4006f423
|
@ -21,6 +21,7 @@ import org.apache.nifi.components.PropertyDescriptor;
|
||||||
import org.apache.nifi.components.ValidationContext;
|
import org.apache.nifi.components.ValidationContext;
|
||||||
import org.apache.nifi.components.ValidationResult;
|
import org.apache.nifi.components.ValidationResult;
|
||||||
import org.apache.nifi.dbcp.DBCPService;
|
import org.apache.nifi.dbcp.DBCPService;
|
||||||
|
import org.apache.nifi.expression.AttributeExpression;
|
||||||
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
|
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
|
||||||
import org.apache.nifi.processor.ProcessContext;
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
import org.apache.nifi.processor.Relationship;
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
@ -82,6 +83,8 @@ import static java.sql.Types.VARCHAR;
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFactoryProcessor {
|
public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFactoryProcessor {
|
||||||
|
|
||||||
|
public static final String INITIAL_MAX_VALUE_PROP_START = "initial.maxvalue.";
|
||||||
|
|
||||||
// Relationships
|
// Relationships
|
||||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
.name("success")
|
.name("success")
|
||||||
|
@ -165,6 +168,9 @@ public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFact
|
||||||
|
|
||||||
private static SimpleDateFormat TIME_TYPE_FORMAT = new SimpleDateFormat("HH:mm:ss.SSS");
|
private static SimpleDateFormat TIME_TYPE_FORMAT = new SimpleDateFormat("HH:mm:ss.SSS");
|
||||||
|
|
||||||
|
// A Map (name to value) of initial maximum-value properties, filled at schedule-time and used at trigger-time
|
||||||
|
protected Map<String,String> maxValueProperties;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// Load the DatabaseAdapters
|
// Load the DatabaseAdapters
|
||||||
ArrayList<AllowableValue> dbAdapterValues = new ArrayList<>();
|
ArrayList<AllowableValue> dbAdapterValues = new ArrayList<>();
|
||||||
|
@ -185,6 +191,18 @@ public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFact
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
|
||||||
|
return new PropertyDescriptor.Builder()
|
||||||
|
.name(propertyDescriptorName)
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING, true))
|
||||||
|
.addValidator(StandardValidators.ATTRIBUTE_KEY_PROPERTY_NAME_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.dynamic(true)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
// A common validation procedure for DB fetch processors, it stores whether the Table Name and/or Max Value Column properties have expression language
|
// A common validation procedure for DB fetch processors, it stores whether the Table Name and/or Max Value Column properties have expression language
|
||||||
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||||
// For backwards-compatibility, keep track of whether the table name and max-value column properties are dynamic (i.e. has expression language)
|
// For backwards-compatibility, keep track of whether the table name and max-value column properties are dynamic (i.e. has expression language)
|
||||||
|
@ -424,4 +442,20 @@ public abstract class AbstractDatabaseFetchProcessor extends AbstractSessionFact
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Map<String,String> getDefaultMaxValueProperties(final Map<PropertyDescriptor, String> properties){
|
||||||
|
final Map<String,String> defaultMaxValues = new HashMap<>();
|
||||||
|
|
||||||
|
for (final Map.Entry<PropertyDescriptor, String> entry : properties.entrySet()) {
|
||||||
|
final String key = entry.getKey().getName();
|
||||||
|
|
||||||
|
if(!key.startsWith(INITIAL_MAX_VALUE_PROP_START)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultMaxValues.put(key.substring(INITIAL_MAX_VALUE_PROP_START.length()), entry.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultMaxValues;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.nifi.processors.standard;
|
package org.apache.nifi.processors.standard;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.nifi.annotation.behavior.DynamicProperty;
|
||||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||||
import org.apache.nifi.annotation.behavior.Stateful;
|
import org.apache.nifi.annotation.behavior.Stateful;
|
||||||
|
@ -94,6 +95,8 @@ import java.util.stream.IntStream;
|
||||||
@WritesAttribute(attribute = "generatetablefetch.limit", description = "The number of result rows to be fetched by the SQL statement."),
|
@WritesAttribute(attribute = "generatetablefetch.limit", description = "The number of result rows to be fetched by the SQL statement."),
|
||||||
@WritesAttribute(attribute = "generatetablefetch.offset", description = "Offset to be used to retrieve the corresponding partition.")
|
@WritesAttribute(attribute = "generatetablefetch.offset", description = "Offset to be used to retrieve the corresponding partition.")
|
||||||
})
|
})
|
||||||
|
@DynamicProperty(name = "Initial Max Value", value = "Attribute Expression Language", supportsExpressionLanguage = false, description = "Specifies an initial "
|
||||||
|
+ "max value for max value columns. Properties should be added in the format `initial.maxvalue.{max_value_column}`.")
|
||||||
public class GenerateTableFetch extends AbstractDatabaseFetchProcessor {
|
public class GenerateTableFetch extends AbstractDatabaseFetchProcessor {
|
||||||
|
|
||||||
public static final PropertyDescriptor PARTITION_SIZE = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor PARTITION_SIZE = new PropertyDescriptor.Builder()
|
||||||
|
@ -150,6 +153,7 @@ public class GenerateTableFetch extends AbstractDatabaseFetchProcessor {
|
||||||
@Override
|
@Override
|
||||||
@OnScheduled
|
@OnScheduled
|
||||||
public void setup(final ProcessContext context) {
|
public void setup(final ProcessContext context) {
|
||||||
|
maxValueProperties = getDefaultMaxValueProperties(context.getProperties());
|
||||||
// Pre-fetch the column types if using a static table name and max-value columns
|
// Pre-fetch the column types if using a static table name and max-value columns
|
||||||
if (!isDynamicTableName && !isDynamicMaxValues) {
|
if (!isDynamicTableName && !isDynamicMaxValues) {
|
||||||
super.setup(context);
|
super.setup(context);
|
||||||
|
@ -198,6 +202,24 @@ public class GenerateTableFetch extends AbstractDatabaseFetchProcessor {
|
||||||
// set as the current state map (after the session has been committed)
|
// set as the current state map (after the session has been committed)
|
||||||
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
|
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
|
||||||
|
|
||||||
|
// If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
|
||||||
|
for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
|
||||||
|
String maxPropKey = maxProp.getKey().toLowerCase();
|
||||||
|
String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
|
||||||
|
if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
|
||||||
|
String newMaxPropValue;
|
||||||
|
// If we can't find the value at the fully-qualified key name, it is possible (under a previous scheme)
|
||||||
|
// the value has been stored under a key that is only the column name. Fall back to check the column name,
|
||||||
|
// but store the new initial max value under the fully-qualified key.
|
||||||
|
if (statePropertyMap.containsKey(maxPropKey)) {
|
||||||
|
newMaxPropValue = statePropertyMap.get(maxPropKey);
|
||||||
|
} else {
|
||||||
|
newMaxPropValue = maxProp.getValue();
|
||||||
|
}
|
||||||
|
statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
|
// Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
|
||||||
// executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
|
// executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
|
||||||
// specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
|
// specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.nifi.components.state.Scope;
|
||||||
import org.apache.nifi.components.state.StateManager;
|
import org.apache.nifi.components.state.StateManager;
|
||||||
import org.apache.nifi.components.state.StateMap;
|
import org.apache.nifi.components.state.StateMap;
|
||||||
import org.apache.nifi.dbcp.DBCPService;
|
import org.apache.nifi.dbcp.DBCPService;
|
||||||
import org.apache.nifi.expression.AttributeExpression;
|
|
||||||
import org.apache.nifi.flowfile.FlowFile;
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
import org.apache.nifi.logging.ComponentLog;
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
@ -110,8 +109,6 @@ public class QueryDatabaseTable extends AbstractDatabaseFetchProcessor {
|
||||||
|
|
||||||
public static final String RESULT_TABLENAME = "tablename";
|
public static final String RESULT_TABLENAME = "tablename";
|
||||||
public static final String RESULT_ROW_COUNT = "querydbtable.row.count";
|
public static final String RESULT_ROW_COUNT = "querydbtable.row.count";
|
||||||
public static final String INTIIAL_MAX_VALUE_PROP_START = "initial.maxvalue.";
|
|
||||||
|
|
||||||
|
|
||||||
public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
|
||||||
.name("Fetch Size")
|
.name("Fetch Size")
|
||||||
|
@ -177,20 +174,9 @@ public class QueryDatabaseTable extends AbstractDatabaseFetchProcessor {
|
||||||
return propDescriptors;
|
return propDescriptors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
|
|
||||||
return new PropertyDescriptor.Builder()
|
|
||||||
.name(propertyDescriptorName)
|
|
||||||
.required(false)
|
|
||||||
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING, true))
|
|
||||||
.addValidator(StandardValidators.ATTRIBUTE_KEY_PROPERTY_NAME_VALIDATOR)
|
|
||||||
.expressionLanguageSupported(true)
|
|
||||||
.dynamic(true)
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@OnScheduled
|
@OnScheduled
|
||||||
public void setup(final ProcessContext context) {
|
public void setup(final ProcessContext context) {
|
||||||
|
maxValueProperties = getDefaultMaxValueProperties(context.getProperties());
|
||||||
super.setup(context);
|
super.setup(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,8 +206,6 @@ public class QueryDatabaseTable extends AbstractDatabaseFetchProcessor {
|
||||||
.defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger())
|
.defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
final Map<String,String> maxValueProperties = getDefaultMaxValueProperties(context.getProperties());
|
|
||||||
|
|
||||||
final StateManager stateManager = context.getStateManager();
|
final StateManager stateManager = context.getStateManager();
|
||||||
final StateMap stateMap;
|
final StateMap stateMap;
|
||||||
|
|
||||||
|
@ -417,23 +401,6 @@ public class QueryDatabaseTable extends AbstractDatabaseFetchProcessor {
|
||||||
return query.toString();
|
return query.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Map<String,String> getDefaultMaxValueProperties(final Map<PropertyDescriptor, String> properties){
|
|
||||||
final Map<String,String> defaultMaxValues = new HashMap<>();
|
|
||||||
|
|
||||||
for (final Map.Entry<PropertyDescriptor, String> entry : properties.entrySet()) {
|
|
||||||
final String key = entry.getKey().getName();
|
|
||||||
|
|
||||||
if(!key.startsWith(INTIIAL_MAX_VALUE_PROP_START)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
defaultMaxValues.put(key.substring(INTIIAL_MAX_VALUE_PROP_START.length()), entry.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
return defaultMaxValues;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected class MaxValueResultSetRowCollector implements JdbcCommon.ResultSetRowCallback {
|
protected class MaxValueResultSetRowCollector implements JdbcCommon.ResultSetRowCallback {
|
||||||
DatabaseAdapter dbAdapter;
|
DatabaseAdapter dbAdapter;
|
||||||
Map<String, String> newColMap;
|
Map<String, String> newColMap;
|
||||||
|
|
|
@ -584,6 +584,75 @@ public class TestGenerateTableFetch {
|
||||||
runner.clearTransferState();
|
runner.clearTransferState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInitialMaxValue() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (0, 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (1, 'Carrie Jones', 5.0, '2000-01-01 03:23:34.234')");
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (2, NULL, 2.0, '2010-01-01 00:00:00')");
|
||||||
|
|
||||||
|
runner.setProperty(GenerateTableFetch.TABLE_NAME, "TEST_QUERY_DB_TABLE");
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
runner.setProperty(GenerateTableFetch.MAX_VALUE_COLUMN_NAMES, "ID");
|
||||||
|
runner.setProperty("initial.maxvalue.ID", "1");
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(REL_SUCCESS, 1);
|
||||||
|
MockFlowFile flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
|
||||||
|
String query = new String(flowFile.toByteArray());
|
||||||
|
assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE ID > 1 ORDER BY ID FETCH NEXT 10000 ROWS ONLY", query);
|
||||||
|
ResultSet resultSet = stmt.executeQuery(query);
|
||||||
|
// Should be one record (the initial max value skips the first two)
|
||||||
|
assertTrue(resultSet.next());
|
||||||
|
assertFalse(resultSet.next());
|
||||||
|
runner.clearTransferState();
|
||||||
|
|
||||||
|
// Run again, this time no flowfiles/rows should be transferred
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(REL_SUCCESS, 0);
|
||||||
|
runner.clearTransferState();
|
||||||
|
|
||||||
|
// Add 3 new rows with a higher ID and run with a partition size of 2. Two flow files should be transferred
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (3, 'Mary West', 15.0, '2000-01-01 03:23:34.234')");
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (4, 'Marty Johnson', 15.0, '2011-01-01 03:23:34.234')");
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (5, 'Marty Johnson', 15.0, '2011-01-01 03:23:34.234')");
|
||||||
|
runner.setProperty(GenerateTableFetch.PARTITION_SIZE, "2");
|
||||||
|
runner.setProperty("initial.maxvalue.ID", "5"); // This should have no effect as there is a max value in the processor state
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(REL_SUCCESS, 2);
|
||||||
|
|
||||||
|
// Verify first flow file's contents
|
||||||
|
flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
|
||||||
|
query = new String(flowFile.toByteArray());
|
||||||
|
assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE ID > 2 ORDER BY ID FETCH NEXT 2 ROWS ONLY", query);
|
||||||
|
resultSet = stmt.executeQuery(query);
|
||||||
|
// Should be two records
|
||||||
|
assertTrue(resultSet.next());
|
||||||
|
assertTrue(resultSet.next());
|
||||||
|
assertFalse(resultSet.next());
|
||||||
|
|
||||||
|
// Verify second flow file's contents
|
||||||
|
flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(1);
|
||||||
|
query = new String(flowFile.toByteArray());
|
||||||
|
assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE ID > 2 ORDER BY ID OFFSET 2 ROWS FETCH NEXT 2 ROWS ONLY", query);
|
||||||
|
resultSet = stmt.executeQuery(query);
|
||||||
|
// Should be one record
|
||||||
|
assertTrue(resultSet.next());
|
||||||
|
assertFalse(resultSet.next());
|
||||||
|
runner.clearTransferState();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple implementation only for GenerateTableFetch processor testing.
|
* Simple implementation only for GenerateTableFetch processor testing.
|
||||||
|
|
Loading…
Reference in New Issue