Add threading to migrator

This commit is contained in:
jamesagnew 2018-10-31 05:50:50 -04:00 committed by Eeva Turkka
parent e6a62a10a2
commit b2479e4a7b
9 changed files with 319 additions and 151 deletions

View File

@ -1,5 +1,25 @@
package ca.uhn.fhir.rest.api;
/*-
* #%L
* HAPI FHIR - Core Library
* %%
* Copyright (C) 2014 - 2018 University Health Network
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.util.HashMap;
import java.util.Map;

View File

@ -31,6 +31,10 @@
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
</dependency>
<!-- This dependency includes the core HAPI-FHIR classes -->
<dependency>
@ -45,11 +49,6 @@
<artifactId>derby</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

View File

@ -1,6 +1,7 @@
package ca.uhn.fhir.jpa.migrate;
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -76,20 +77,13 @@ public enum DriverTypeEnum {
throw new InternalErrorException("Unable to find driver class: " + myDriverClassName, e);
}
SingleConnectionDataSource dataSource = new SingleConnectionDataSource(){
@Override
protected Connection getConnectionFromDriver(Properties props) throws SQLException {
Connection connect = driver.connect(theUrl, props);
assert connect != null;
return connect;
}
};
dataSource.setAutoCommit(false);
BasicDataSource dataSource = new BasicDataSource();
// dataSource.setAutoCommit(false);
dataSource.setDriverClassName(myDriverClassName);
dataSource.setUrl(theUrl);
dataSource.setUsername(theUsername);
dataSource.setPassword(thePassword);
dataSource.setSuppressClose(true);
// dataSource.setSuppressClose(true);
DataSourceTransactionManager transactionManager = new DataSourceTransactionManager();
transactionManager.setDataSource(dataSource);

View File

@ -23,15 +23,21 @@ package ca.uhn.fhir.jpa.migrate.taskdef;
import ca.uhn.fhir.util.StopWatch;
import com.google.common.collect.ForwardingMap;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.concurrent.BasicThreadFactory;
import org.checkerframework.checker.nullness.compatqual.NullableDecl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.jdbc.core.ColumnMapRowMapper;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.RowCallbackHandler;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
import java.util.function.Function;
public class CalculateHashesTask extends BaseTableColumnTask<CalculateHashesTask> {
@ -39,75 +45,147 @@ public class CalculateHashesTask extends BaseTableColumnTask<CalculateHashesTask
private static final Logger ourLog = LoggerFactory.getLogger(CalculateHashesTask.class);
private int myBatchSize = 10000;
private Map<String, Function<MandatoryKeyMap<String, Object>, Long>> myCalculators = new HashMap<>();
private ThreadPoolExecutor myExecutor;
public void setBatchSize(int theBatchSize) {
myBatchSize = theBatchSize;
}
/**
* Constructor
*/
public CalculateHashesTask() {
super();
}
@Override
public void execute() {
public synchronized void execute() throws SQLException {
if (isDryRun()) {
return;
}
List<Map<String, Object>> rows;
do {
rows = getTxTemplate().execute(t -> {
JdbcTemplate jdbcTemplate = newJdbcTemnplate();
jdbcTemplate.setMaxRows(myBatchSize);
String sql = "SELECT * FROM " + getTableName() + " WHERE " + getColumnName() + " IS NULL";
ourLog.info("Finding up to {} rows in {} that requires hashes", myBatchSize, getTableName());
return jdbcTemplate.queryForList(sql);
});
initializeExecutor();
try {
updateRows(rows);
} while (rows.size() > 0);
}
while(true) {
MyRowCallbackHandler rch = new MyRowCallbackHandler();
getTxTemplate().execute(t -> {
JdbcTemplate jdbcTemplate = newJdbcTemnplate();
jdbcTemplate.setMaxRows(100000);
String sql = "SELECT * FROM " + getTableName() + " WHERE " + getColumnName() + " IS NULL";
ourLog.info("Finding up to {} rows in {} that requires hashes", myBatchSize, getTableName());
private void updateRows(List<Map<String, Object>> theRows) {
StopWatch sw = new StopWatch();
getTxTemplate().execute(t -> {
jdbcTemplate.query(sql, rch);
rch.done();
// Loop through rows
assert theRows != null;
for (Map<String, Object> nextRow : theRows) {
return null;
});
Map<String, Long> newValues = new HashMap<>();
MandatoryKeyMap<String, Object> nextRowMandatoryKeyMap = new MandatoryKeyMap<>(nextRow);
// Apply calculators
for (Map.Entry<String, Function<MandatoryKeyMap<String, Object>, Long>> nextCalculatorEntry : myCalculators.entrySet()) {
String nextColumn = nextCalculatorEntry.getKey();
Function<MandatoryKeyMap<String, Object>, Long> nextCalculator = nextCalculatorEntry.getValue();
Long value = nextCalculator.apply(nextRowMandatoryKeyMap);
newValues.put(nextColumn, value);
rch.submitNext();
List<Future<?>> futures = rch.getFutures();
if (futures.isEmpty()) {
break;
}
// Generate update SQL
StringBuilder sqlBuilder = new StringBuilder();
List<Long> arguments = new ArrayList<>();
sqlBuilder.append("UPDATE ");
sqlBuilder.append(getTableName());
sqlBuilder.append(" SET ");
for (Map.Entry<String, Long> nextNewValueEntry : newValues.entrySet()) {
if (arguments.size() > 0) {
sqlBuilder.append(", ");
ourLog.info("Waiting for {} tasks to complete", futures.size());
for (Future<?> next : futures) {
try {
next.get();
} catch (Exception e) {
throw new SQLException(e);
}
sqlBuilder.append(nextNewValueEntry.getKey()).append(" = ?");
arguments.add(nextNewValueEntry.getValue());
}
sqlBuilder.append(" WHERE SP_ID = ?");
arguments.add((Long) nextRow.get("SP_ID"));
// Apply update SQL
newJdbcTemnplate().update(sqlBuilder.toString(), arguments.toArray());
}
return theRows.size();
});
ourLog.info("Updated {} rows on {} in {}", theRows.size(), getTableName(), sw.toString());
} finally {
destroyExecutor();
}
}
private void destroyExecutor() {
myExecutor.shutdownNow();
}
private void initializeExecutor() {
int maximumPoolSize = Runtime.getRuntime().availableProcessors();
LinkedBlockingQueue<Runnable> executorQueue = new LinkedBlockingQueue<>(maximumPoolSize);
BasicThreadFactory threadFactory = new BasicThreadFactory.Builder()
.namingPattern("worker-" + "-%d")
.daemon(false)
.priority(Thread.NORM_PRIORITY)
.build();
RejectedExecutionHandler rejectedExecutionHandler = new RejectedExecutionHandler() {
@Override
public void rejectedExecution(Runnable theRunnable, ThreadPoolExecutor theExecutor) {
ourLog.info("Note: Executor queue is full ({} elements), waiting for a slot to become available!", executorQueue.size());
StopWatch sw = new StopWatch();
try {
executorQueue.put(theRunnable);
} catch (InterruptedException theE) {
throw new RejectedExecutionException("Task " + theRunnable.toString() +
" rejected from " + theE.toString());
}
ourLog.info("Slot become available after {}ms", sw.getMillis());
}
};
myExecutor = new ThreadPoolExecutor(
1,
maximumPoolSize,
0L,
TimeUnit.MILLISECONDS,
executorQueue,
threadFactory,
rejectedExecutionHandler);
}
private Future<?> updateRows(List<Map<String, Object>> theRows) {
Runnable task = () -> {
StopWatch sw = new StopWatch();
getTxTemplate().execute(t -> {
// Loop through rows
assert theRows != null;
for (Map<String, Object> nextRow : theRows) {
Map<String, Long> newValues = new HashMap<>();
MandatoryKeyMap<String, Object> nextRowMandatoryKeyMap = new MandatoryKeyMap<>(nextRow);
// Apply calculators
for (Map.Entry<String, Function<MandatoryKeyMap<String, Object>, Long>> nextCalculatorEntry : myCalculators.entrySet()) {
String nextColumn = nextCalculatorEntry.getKey();
Function<MandatoryKeyMap<String, Object>, Long> nextCalculator = nextCalculatorEntry.getValue();
Long value = nextCalculator.apply(nextRowMandatoryKeyMap);
newValues.put(nextColumn, value);
}
// Generate update SQL
StringBuilder sqlBuilder = new StringBuilder();
List<Long> arguments = new ArrayList<>();
sqlBuilder.append("UPDATE ");
sqlBuilder.append(getTableName());
sqlBuilder.append(" SET ");
for (Map.Entry<String, Long> nextNewValueEntry : newValues.entrySet()) {
if (arguments.size() > 0) {
sqlBuilder.append(", ");
}
sqlBuilder.append(nextNewValueEntry.getKey()).append(" = ?");
arguments.add(nextNewValueEntry.getValue());
}
sqlBuilder.append(" WHERE SP_ID = ?");
arguments.add((Long) nextRow.get("SP_ID"));
// Apply update SQL
newJdbcTemnplate().update(sqlBuilder.toString(), arguments.toArray());
}
return theRows.size();
});
ourLog.info("Updated {} rows on {} in {}", theRows.size(), getTableName(), sw.toString());
};
return myExecutor.submit(task);
}
public CalculateHashesTask addCalculator(String theColumnName, Function<MandatoryKeyMap<String, Object>, Long> theConsumer) {
@ -116,6 +194,39 @@ public class CalculateHashesTask extends BaseTableColumnTask<CalculateHashesTask
return this;
}
private class MyRowCallbackHandler implements RowCallbackHandler {
private List<Map<String, Object>> myRows = new ArrayList<>();
private List<Future<?>> myFutures = new ArrayList<>();
@Override
public void processRow(ResultSet rs) throws SQLException {
Map<String, Object> row = new ColumnMapRowMapper().mapRow(rs, 0);
myRows.add(row);
if (myRows.size() >= myBatchSize) {
submitNext();
}
}
private void submitNext() {
if (myRows.size() > 0) {
myFutures.add(updateRows(myRows));
myRows = new ArrayList<>();
}
}
public List<Future<?>> getFutures() {
return myFutures;
}
public void done() {
if (myRows.size() > 0) {
submitNext();
}
}
}
public static class MandatoryKeyMap<K, V> extends ForwardingMap<K, V> {

View File

@ -9,7 +9,7 @@ import java.util.Map;
import static org.junit.Assert.assertEquals;
public class CreateHashesTest extends BaseTest {
public class CalculateHashesTest extends BaseTest {
@Test
public void testCreateHashes() {
@ -50,4 +50,36 @@ public class CreateHashesTest extends BaseTest {
});
}
@Test
public void testCreateHashesLargeNumber() {
executeSql("create table HFJ_SPIDX_TOKEN (SP_ID bigint not null, SP_MISSING boolean, SP_NAME varchar(100) not null, RES_ID bigint, RES_TYPE varchar(255) not null, SP_UPDATED timestamp, HASH_IDENTITY bigint, HASH_SYS bigint, HASH_SYS_AND_VALUE bigint, HASH_VALUE bigint, SP_SYSTEM varchar(200), SP_VALUE varchar(200), primary key (SP_ID))");
for (int i = 0; i < 777; i++) {
executeSql("insert into HFJ_SPIDX_TOKEN (SP_MISSING, SP_NAME, RES_ID, RES_TYPE, SP_UPDATED, SP_SYSTEM, SP_VALUE, SP_ID) values (false, 'identifier', 999, 'Patient', '2018-09-03 07:44:49.196', 'urn:oid:1.2.410.100110.10.41308301', '8888888" + i + "', " + i + ")");
}
Long count = getConnectionProperties().getTxTemplate().execute(t -> {
JdbcTemplate jdbcTemplate = getConnectionProperties().newJdbcTemplate();
return jdbcTemplate.queryForObject("SELECT count(*) FROM HFJ_SPIDX_TOKEN WHERE HASH_VALUE IS NULL", Long.class);
});
assertEquals(777L, count.longValue());
CalculateHashesTask task = new CalculateHashesTask();
task.setTableName("HFJ_SPIDX_TOKEN");
task.setColumnName("HASH_IDENTITY");
task.addCalculator("HASH_IDENTITY", t -> BaseResourceIndexedSearchParam.calculateHashIdentity(t.getResourceType(), t.getString("SP_NAME")));
task.addCalculator("HASH_SYS", t -> ResourceIndexedSearchParamToken.calculateHashSystem(t.getResourceType(), t.getParamName(), t.getString("SP_SYSTEM")));
task.addCalculator("HASH_SYS_AND_VALUE", t -> ResourceIndexedSearchParamToken.calculateHashSystemAndValue(t.getResourceType(), t.getParamName(), t.getString("SP_SYSTEM"), t.getString("SP_VALUE")));
task.addCalculator("HASH_VALUE", t -> ResourceIndexedSearchParamToken.calculateHashValue(t.getResourceType(), t.getParamName(), t.getString("SP_VALUE")));
task.setBatchSize(3);
getMigrator().addTask(task);
getMigrator().migrate();
count = getConnectionProperties().getTxTemplate().execute(t -> {
JdbcTemplate jdbcTemplate = getConnectionProperties().newJdbcTemplate();
return jdbcTemplate.queryForObject("SELECT count(*) FROM HFJ_SPIDX_TOKEN WHERE HASH_VALUE IS NULL", Long.class);
});
assertEquals(0L, count.longValue());
}
}

View File

@ -1727,6 +1727,12 @@
<copy todir="target/site/apidocs-jpaserver">
<fileset dir="hapi-fhir-jpaserver-base/target/site/apidocs"/>
</copy>
<copy todir="target/site/apidocs-client">
<fileset dir="hapi-fhir-client/target/site/apidocs"/>
</copy>
<copy todir="target/site/apidocs-server">
<fileset dir="hapi-fhir-server/target/site/apidocs"/>
</copy>
<copy todir="target/site/xref-jpaserver">
<fileset dir="hapi-fhir-jpaserver-base/target/site/xref"/>
</copy>
@ -2131,6 +2137,8 @@
<module>hapi-fhir-structures-dstu2</module>
<module>hapi-fhir-structures-dstu3</module>
<module>hapi-fhir-structures-r4</module>
<module>hapi-fhir-client</module>
<module>hapi-fhir-server</module>
<module>hapi-fhir-jpaserver-base</module>
<module>hapi-fhir-jaxrsserver-base</module>
<!-- <module>hapi-fhir-cobertura</module> -->

View File

@ -115,6 +115,8 @@
<item name="Model API (DSTU2)" href="./apidocs-dstu2/index.html" />
<item name="Model API (DSTU3)" href="./apidocs-dstu3/index.html" />
<item name="Model API (R4)" href="./apidocs-r4/index.html" />
<item name="Client API" href="./apidocs-client/index.html" />
<item name="Server API" href="./apidocs-server/index.html" />
<item name="JPA Server API" href="./apidocs-jpaserver/index.html" />
</item>
<item name="Command Line Tool (hapi-fhir-cli)" href="./doc_cli.html" />

View File

@ -70,6 +70,8 @@
<li><a href="./apidocs-dstu/index.html">Model API (DSTU1)</a></li>
<li><a href="./apidocs-dstu2/index.html" >Model API (DSTU2)</a></li>
<li><a href="./apidocs-dstu3/index.html" >Model API (STU3)</a></li>
<li><a href="./apidocs-client/index.html" >Client API</a></li>
<li><a href="./apidocs-server/index.html" >Server API</a></li>
<li><a href="./apidocs-jpaserver/index.html">JPA Server API</a></li>
</ul>