From e37edfcf8445fcfa36337ef0b651f36092ec865e Mon Sep 17 00:00:00 2001
From: Luke deGruchy <luke.degruchy@smilecdr.com>
Date: Wed, 7 Jun 2023 09:05:08 -0400
Subject: [PATCH] Ensure StepExecutor only logs a stack trace and error if the
 retries have been exhausted.  Otherwise, just log a debug. (#4961)

* Ensure StepExecutor only logs a stack trace and error if the retries have been exhausted.  Otherwise, just log a debug.

* Accept code reviewer suggestion to amend log message

Co-authored-by: michaelabuckley <michaelabuckley@gmail.com>

---------

Co-authored-by: michaelabuckley <michaelabuckley@gmail.com>
---
 ...0-delete-expunge-mssql-oracle-10000-resources-error.yaml | 6 ++++++
 .../java/ca/uhn/fhir/batch2/coordinator/StepExecutor.java   | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/4960-delete-expunge-mssql-oracle-10000-resources-error.yaml

diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/4960-delete-expunge-mssql-oracle-10000-resources-error.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/4960-delete-expunge-mssql-oracle-10000-resources-error.yaml
new file mode 100644
index 00000000000..4c1f6664bf5
--- /dev/null
+++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/6_8_0/4960-delete-expunge-mssql-oracle-10000-resources-error.yaml
@@ -0,0 +1,6 @@
+---
+type: fix
+issue: 4960
+jira: SMILE-5740
+title: "Running a $delete-expunge MSSQL or Oracle with over 10,000 resources results in a error and a stack trace, even though the job ends in status COMPLETE.
+        This has been fixed."
diff --git a/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/coordinator/StepExecutor.java b/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/coordinator/StepExecutor.java
index 585e361bab6..385d9b0cb79 100644
--- a/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/coordinator/StepExecutor.java
+++ b/hapi-fhir-storage-batch2/src/main/java/ca/uhn/fhir/batch2/coordinator/StepExecutor.java
@@ -70,10 +70,11 @@ public class StepExecutor {
 			return false;
 		} catch (Exception e) {
 			if (theStepExecutionDetails.hasAssociatedWorkChunk()) {
-				ourLog.error("Failure executing job {} step {}, marking chunk {} as ERRORED", jobDefinitionId, targetStepId, chunkId, e);
+				ourLog.info("Temporary problem executing job {} step {}, marking chunk {} as retriable ERRORED", jobDefinitionId, targetStepId, chunkId);
 				WorkChunkErrorEvent parameters = new WorkChunkErrorEvent(chunkId, e.getMessage());
 				WorkChunkStatusEnum newStatus = myJobPersistence.onWorkChunkError(parameters);
 				if (newStatus == WorkChunkStatusEnum.FAILED) {
+					ourLog.error("Exhausted retries:  Failure executing job {} step {}, marking chunk {} as ERRORED", jobDefinitionId, targetStepId, chunkId, e);
 					return false;
 				}
 			} else {