Break valueset concept validation into multiple smaller batches for performance reasons

This commit is contained in:
Grahame Grieve 2024-09-30 08:32:49 +10:00
parent ae7b962d29
commit ef6a398ebe
2 changed files with 69 additions and 31 deletions

View File

@ -181,7 +181,7 @@ public class StructureMapValidator extends BaseValidator {
}
public String summary() {
return mode+" "+getWorkingType()+" "+name;
return name+" : "+getWorkingType()+" ("+mode+")";
}
public boolean matches(VariableDefn other) {
@ -215,6 +215,13 @@ public class StructureMapValidator extends BaseValidator {
}
}
public void copyType(VariableDefn source) {
this.max = source.max;
this.sd = source.sd;
this.ed = source.ed;
this.type = source.type;
}
}
public class VariableSet {
@ -332,6 +339,9 @@ public class StructureMapValidator extends BaseValidator {
public boolean validateStructureMap(ValidationContext valContext, List<ValidationMessage> errors, Element src, NodeStack stack) {
boolean ok = true;
if ("http://smart.who.int/immunizations-measles/StructureMap/IMMZCQRToLM".equals(src.getNamedChildValue("url"))) {
DebugUtilities.breakpoint();
}
List<Element> imports = src.getChildrenByName("import");
int cc = 0;
for (Element import_ : imports) {
@ -649,6 +659,20 @@ public class StructureMapValidator extends BaseValidator {
}
// check condition
// check check
} else {
String variable = source.getChildValue("variable");
VariableDefn vn = null;
if (hint(errors, "2023-03-01", IssueType.INVALID, source.line(), source.col(), stack.getLiteralPath(), variable != null, I18nConstants.SM_RULE_SOURCE_UNASSIGNED)) {
if (rule(errors, "2023-03-01", IssueType.INVALID, source.line(), source.col(), stack.getLiteralPath(), idIsValid(variable), I18nConstants.SM_NAME_INVALID, variable)) {
vn = variables.add(variable, v.getMode()); // may overwrite
vn.copyType(v);
if (loopCounter == 0) {
ruleInfo.setDefVariable(variable);
}
} else {
ok = false;
}
}
}
} else {
String variable = source.getChildValue("variable");
@ -1237,7 +1261,10 @@ public class StructureMapValidator extends BaseValidator {
// target can transition to the source
v = getParameter(errors, param, pstack, variables, StructureMapInputMode.TARGET);
}
if (rule(errors, "2023-06-27", IssueType.INVALID, param.line(), param.col(), pstack.getLiteralPath(), v != null, I18nConstants.SM_DEPENDENT_PARAM_NOT_FOUND, pname, input.getMode().toCode())) {
if (v == null) {
DebugUtilities.breakpoint();
}
if (rule(errors, "2023-06-27", IssueType.INVALID, param.line(), param.col(), pstack.getLiteralPath(), v != null, I18nConstants.SM_DEPENDENT_PARAM_NOT_FOUND, pname, input.getMode().toCode(), variables.summary())) {
if (rule(errors, "2023-03-01", IssueType.INVALID, param.line(), param.col(), pstack.getLiteralPath(),
v.mode.equals(input.getMode().toCode()) || (v.mode.equals("target") && input.getMode() == StructureMapInputMode.SOURCE), I18nConstants.SM_DEPENDENT_PARAM_MODE_MISMATCH, param.getChildValue("name"), v.mode, input.getMode().toCode(), grp.getTargetGroup().getName()) &&
rule(errors, "2023-03-01", IssueType.INVALID, param.line(), param.col(), pstack.getLiteralPath(), typesMatch(v, iType), I18nConstants.SM_DEPENDENT_PARAM_TYPE_MISMATCH,

View File

@ -128,6 +128,7 @@ public class ValueSetValidator extends BaseValidator {
}
private static final int TOO_MANY_CODES_TO_VALIDATE = 1000;
private static final int VALIDATION_BATCH_SIZE = 300;
private CodeSystemChecker getSystemValidator(String system, List<ValidationMessage> errors) {
if (system == null) {
@ -319,36 +320,26 @@ public class ValueSetValidator extends BaseValidator {
int cc = 0;
List<VSCodingValidationRequest> batch = new ArrayList<>();
boolean first = true;
for (Element concept : concepts) {
// we treat the first differently because we want to know if the system is worth validating. if it is, then we batch the rest
if (first) {
systemOk = validateValueSetIncludeConcept(errors, concept, stack, stack.push(concept, cc, null, null), system, version, csChecker);
first = false;
} else if (systemOk) {
batch.add(prepareValidateValueSetIncludeConcept(errors, concept, stack.push(concept, cc, null, null), system, version, csChecker));
}
cc++;
}
if (((InstanceValidator) parent).isValidateValueSetCodesOnTxServer() && batch.size() > 0 & !context.isNoTerminologyServer()) {
if (batch.size() > TOO_MANY_CODES_TO_VALIDATE) {
ok = hint(errors, "2023-09-06", IssueType.BUSINESSRULE, stack, false, I18nConstants.VALUESET_INC_TOO_MANY_CODES, batch.size()) && ok;
} else {
long t = System.currentTimeMillis();
if (parent.isDebug()) {
System.out.println(" : Validate "+batch.size()+" codes from "+system+" for "+vsid);
}
if (concepts.size() > TOO_MANY_CODES_TO_VALIDATE) {
hint(errors, "2023-09-06", IssueType.BUSINESSRULE, stack, false, I18nConstants.VALUESET_INC_TOO_MANY_CODES, batch.size());
} else {
if (((InstanceValidator) parent).isValidateValueSetCodesOnTxServer() && !context.isNoTerminologyServer()) {
try {
context.validateCodeBatch(ValidationOptions.defaults().withExampleOK(), batch, null);
if (parent.isDebug()) {
System.out.println(" : .. "+(System.currentTimeMillis()-t)+"ms");
}
for (VSCodingValidationRequest cv : batch) {
if (version == null) {
warningOrHint(errors, NO_RULE_DATE, IssueType.BUSINESSRULE, cv.getStack().getLiteralPath(), cv.getResult().isOk(), !retired, I18nConstants.VALUESET_INCLUDE_INVALID_CONCEPT_CODE, system, cv.getCoding().getCode(), cv.getResult().getMessage());
} else {
warningOrHint(errors, NO_RULE_DATE, IssueType.BUSINESSRULE, cv.getStack().getLiteralPath(), cv.getResult().isOk(), !retired, I18nConstants.VALUESET_INCLUDE_INVALID_CONCEPT_CODE_VER, system, version, cv.getCoding().getCode(), cv.getResult().getMessage());
for (Element concept : concepts) {
// we treat the first differently because we want to know if the system is worth validating. if it is, then we batch the rest
if (first) {
systemOk = validateValueSetIncludeConcept(errors, concept, stack, stack.push(concept, cc, null, null), system, version, csChecker);
first = false;
} else if (systemOk) {
batch.add(prepareValidateValueSetIncludeConcept(errors, concept, stack.push(concept, cc, null, null), system, version, csChecker));
if (batch.size() > VALIDATION_BATCH_SIZE) {
executeValidationBatch(errors, vsid, retired, system, version, batch);
batch.clear();
}
}
}
cc++;
}
executeValidationBatch(errors, vsid, retired, system, version, batch);
} catch (Exception e) {
ok = false;
VSCodingValidationRequest cv = batch.get(0);
@ -356,7 +347,6 @@ public class ValueSetValidator extends BaseValidator {
}
}
}
int cf = 0;
for (Element filter : filters) {
ok = validateValueSetIncludeFilter(errors, filter, stack.push(filter, cf, null, null), system, version, cs, csChecker) & ok;
@ -369,6 +359,27 @@ public class ValueSetValidator extends BaseValidator {
return ok;
}
private void executeValidationBatch(List<ValidationMessage> errors, String vsid, boolean retired, String system,
String version, List<VSCodingValidationRequest> batch) {
if (batch.size() > 0) {
long t = System.currentTimeMillis();
if (parent.isDebug()) {
System.out.println(" : Validate "+batch.size()+" codes from "+system+" for "+vsid);
}
context.validateCodeBatch(ValidationOptions.defaults().withExampleOK(), batch, null);
if (parent.isDebug()) {
System.out.println(" : .. "+(System.currentTimeMillis()-t)+"ms");
}
for (VSCodingValidationRequest cv : batch) {
if (version == null) {
warningOrHint(errors, NO_RULE_DATE, IssueType.BUSINESSRULE, cv.getStack().getLiteralPath(), cv.getResult().isOk(), !retired, I18nConstants.VALUESET_INCLUDE_INVALID_CONCEPT_CODE, system, cv.getCoding().getCode(), cv.getResult().getMessage());
} else {
warningOrHint(errors, NO_RULE_DATE, IssueType.BUSINESSRULE, cv.getStack().getLiteralPath(), cv.getResult().isOk(), !retired, I18nConstants.VALUESET_INCLUDE_INVALID_CONCEPT_CODE_VER, system, version, cv.getCoding().getCode(), cv.getResult().getMessage());
}
}
}
}
private boolean validateValueSetIncludeConcept(List<ValidationMessage> errors, Element concept, NodeStack stackInc, NodeStack stack, String system, String version, CodeSystemChecker slv) {
String code = concept.getChildValue("code");