From 471073103f963e7fa1a7d8170b0dd862bed2c80a Mon Sep 17 00:00:00 2001 From: James Agnew Date: Tue, 28 Jan 2020 14:36:15 -0500 Subject: [PATCH] Further reduce memory usage in the terminology delta operations --- .../hapi/fhir/changelog/4_2_0/changes.yaml | 2 +- .../term/TermCodeSystemStorageSvcImpl.java | 115 +++--------------- .../jpa/term/TerminologySvcDeltaR4Test.java | 82 ++++++------- 3 files changed, 57 insertions(+), 142 deletions(-) diff --git a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/4_2_0/changes.yaml b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/4_2_0/changes.yaml index 51c674c14d9..5aff4b3cbf7 100644 --- a/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/4_2_0/changes.yaml +++ b/hapi-fhir-docs/src/main/resources/ca/uhn/hapi/fhir/changelog/4_2_0/changes.yaml @@ -84,4 +84,4 @@ Sean McIlvenna for reporting!" Petro Mykhailysyn for the pull request!" - item: type: "fix" - title: "A meomery leak was resolved in the JPA terminology service delta upload operations." + title: "A memory leak was resolved in the JPA terminology service delta upload operations." diff --git a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TermCodeSystemStorageSvcImpl.java b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TermCodeSystemStorageSvcImpl.java index a4abf99c328..43648e3e99c 100644 --- a/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TermCodeSystemStorageSvcImpl.java +++ b/hapi-fhir-jpaserver-base/src/main/java/ca/uhn/fhir/jpa/term/TermCodeSystemStorageSvcImpl.java @@ -164,86 +164,12 @@ public class TermCodeSystemStorageSvcImpl implements ITermCodeSystemStorageSvc { IIdType codeSystemId = cs.getResource().getIdDt(); - // Load all concepts for the code system - Map codeToConceptPid = new HashMap<>(); - { - ourLog.info("Loading all concepts in CodeSystem versionPid[{}] and url[{}]", cs.getPid(), theSystem); - StopWatch sw = new StopWatch(); - CriteriaBuilder criteriaBuilder = myEntityManager.getCriteriaBuilder(); - CriteriaQuery query = criteriaBuilder.createQuery(TermConcept.class); - Root root = query.from(TermConcept.class); - Predicate predicate = criteriaBuilder.equal(root.get("myCodeSystemVersionPid").as(Long.class), csv.getPid()); - query.where(predicate); - TypedQuery typedQuery = myEntityManager.createQuery(query.select(root)); - org.hibernate.query.Query hibernateQuery = (org.hibernate.query.Query) typedQuery; - ScrollableResults scrollableResults = hibernateQuery.scroll(ScrollMode.FORWARD_ONLY); - int count = 0; - try (ScrollableResultsIterator scrollableResultsIterator = new ScrollableResultsIterator<>(scrollableResults)) { - while (scrollableResultsIterator.hasNext()) { - TermConcept next = scrollableResultsIterator.next(); - codeToConceptPid.put(next.getCode(), next.getId()); - - // We don't want to keep the loaded entities in the L1 cache because they can take up a loooot of memory - if (count % 100 == 0) { - myEntityManager.clear(); - } - count++; - } - } - ourLog.info("Loaded {} concepts in {}", codeToConceptPid.size(), sw.toString()); - } - - // Load all parent/child links - ListMultimap parentCodeToChildCodes = ArrayListMultimap.create(); - ListMultimap childCodeToParentCodes = ArrayListMultimap.create(); - { - ourLog.info("Loading all parent/child relationships in CodeSystem url[" + theSystem + "]"); - int count = 0; - StopWatch sw = new StopWatch(); - CriteriaBuilder criteriaBuilder = myEntityManager.getCriteriaBuilder(); - CriteriaQuery query = criteriaBuilder.createQuery(TermConceptParentChildLink.class); - Root root = query.from(TermConceptParentChildLink.class); - Predicate predicate = criteriaBuilder.equal(root.get("myCodeSystemVersionPid").as(Long.class), csv.getPid()); - root.fetch("myChild"); - root.fetch("myParent"); - query.where(predicate); - TypedQuery typedQuery = myEntityManager.createQuery(query.select(root)); - org.hibernate.query.Query hibernateQuery = (org.hibernate.query.Query) typedQuery; - ScrollableResults scrollableResults = hibernateQuery.scroll(ScrollMode.FORWARD_ONLY); - try (ScrollableResultsIterator scrollableResultsIterator = new ScrollableResultsIterator<>(scrollableResults)) { - while (scrollableResultsIterator.hasNext()) { - TermConceptParentChildLink next = scrollableResultsIterator.next(); - String parentCode = next.getParent().getCode(); - String childCode = next.getChild().getCode(); - parentCodeToChildCodes.put(parentCode, childCode); - childCodeToParentCodes.put(childCode, parentCode); - - // We don't want to keep the loaded entities in the L1 cache because they can take up a loooot of memory - if (count % 100 == 0) { - myEntityManager.clear(); - } - - count++; - } - } - ourLog.info("Loaded {} parent/child relationships in {}", count, sw.toString()); - } - - ourLog.trace("Starting delta application"); - - // Account for root codes in the parent->child map - for (String nextCode : codeToConceptPid.keySet()) { - if (childCodeToParentCodes.get(nextCode).isEmpty()) { - parentCodeToChildCodes.put("", nextCode); - } - } - UploadStatistics retVal = new UploadStatistics(codeSystemId); // Add root concepts for (TermConcept nextRootConcept : theAdditions.getRootConcepts()) { List parentCodes = Collections.emptyList(); - addConcept(csv, codeToConceptPid, parentCodes, nextRootConcept, parentCodeToChildCodes, retVal, theAdditions.getRootConceptCodes(), true); + addConcept(csv, parentCodes, nextRootConcept, retVal, true, 0); } return retVal; @@ -519,48 +445,37 @@ public class TermCodeSystemStorageSvcImpl implements ITermCodeSystemStorageSvc { Validate.isTrue(myContext.getVersion().getVersion().isEqualOrNewerThan(FhirVersionEnum.DSTU3), "Terminology operations only supported in DSTU3+ mode"); } - private void addConcept(TermCodeSystemVersion theCsv, Map theCodeToConceptPid, Collection theParentCodes, TermConcept theConceptToAdd, ListMultimap theParentCodeToChildCodes, UploadStatistics theStatisticsTracker, Set theAdditionSetRootConceptCodes, boolean theRootConcept) { + private void addConcept(TermCodeSystemVersion theCsv, Collection theParentCodes, TermConcept theConceptToAdd, UploadStatistics theStatisticsTracker, boolean theRootConcept, int theSequence) { TermConcept conceptToAdd = theConceptToAdd; List childrenToAdd = theConceptToAdd.getChildren(); String nextCodeToAdd = conceptToAdd.getCode(); String parentDescription = "(root concept)"; Set parentConcepts = new HashSet<>(); + if (!theParentCodes.isEmpty()) { parentDescription = "[" + String.join(", ", theParentCodes) + "]"; for (String nextParentCode : theParentCodes) { - Long nextParentCodePid = theCodeToConceptPid.get(nextParentCode); - if (nextParentCodePid == null) { + Optional nextParentOpt = myConceptDao.findByCodeSystemAndCode(theCsv, nextParentCode); + if (nextParentOpt.isPresent() == false) { throw new InvalidRequestException("Unable to add code \"" + nextCodeToAdd + "\" to unknown parent: " + nextParentCode); } - parentConcepts.add(myConceptDao.getOne(nextParentCodePid)); + parentConcepts.add(nextParentOpt.get()); } } ourLog.info("Saving concept {} with parent {}", theStatisticsTracker.getUpdatedConceptCount(), parentDescription); - if (theCodeToConceptPid.containsKey(nextCodeToAdd)) { - - TermConcept existingCode = myConceptDao.getOne(theCodeToConceptPid.get(nextCodeToAdd)); + Optional existingCodeOpt = myConceptDao.findByCodeSystemAndCode(theCsv, nextCodeToAdd); + if (existingCodeOpt.isPresent()) { + TermConcept existingCode = existingCodeOpt.get(); existingCode.setIndexStatus(null); existingCode.setDisplay(conceptToAdd.getDisplay()); conceptToAdd = existingCode; - } - if (conceptToAdd.getSequence() == null || !theRootConcept) { - // If this is a new code, give it a sequence number based on how many concepts the - // parent already has (or the highest number, if the code has multiple parents) - int sequence = 0; - for (String nextParentCode : theParentCodes) { - theParentCodeToChildCodes.put(nextParentCode, nextCodeToAdd); - sequence = Math.max(sequence, theParentCodeToChildCodes.get(nextParentCode).size()); - } - if (theParentCodes.isEmpty()) { - theParentCodeToChildCodes.put("", nextCodeToAdd); - sequence = Math.max(sequence, theParentCodeToChildCodes.get("").size()); - } - conceptToAdd.setSequence(sequence); + if (conceptToAdd.getSequence() == null) { + conceptToAdd.setSequence(theSequence); } // Drop any old parent-child links if they aren't explicitly specified in the @@ -586,7 +501,6 @@ public class TermCodeSystemStorageSvcImpl implements ITermCodeSystemStorageSvc { Long nextConceptPid = conceptToAdd.getId(); Validate.notNull(nextConceptPid); - theCodeToConceptPid.put(nextCodeToAdd, nextConceptPid); theStatisticsTracker.incrementUpdatedConceptCount(); // Add link to new child to the parent @@ -605,6 +519,7 @@ public class TermCodeSystemStorageSvcImpl implements ITermCodeSystemStorageSvc { ourLog.trace("About to save parent-child links"); // Save children recursively + int childIndex = 0; for (TermConceptParentChildLink nextChildConceptLink : new ArrayList<>(childrenToAdd)) { TermConcept nextChild = nextChildConceptLink.getChild(); @@ -612,15 +527,15 @@ public class TermCodeSystemStorageSvcImpl implements ITermCodeSystemStorageSvc { for (int i = 0; i < nextChild.getParents().size(); i++) { if (nextChild.getParents().get(i).getId() == null) { String parentCode = nextChild.getParents().get(i).getParent().getCode(); - Long parentPid = theCodeToConceptPid.get(parentCode); - TermConcept parentConcept = myConceptDao.findById(parentPid).orElseThrow(() -> new IllegalArgumentException("Unknown parent code: " + parentCode)); + TermConcept parentConcept = myConceptDao.findByCodeSystemAndCode(theCsv, parentCode).orElseThrow(() -> new IllegalArgumentException("Unknown parent code: " + parentCode)); nextChild.getParents().get(i).setParent(parentConcept); } } Collection parentCodes = nextChild.getParents().stream().map(t -> t.getParent().getCode()).collect(Collectors.toList()); - addConcept(theCsv, theCodeToConceptPid, parentCodes, nextChild, theParentCodeToChildCodes, theStatisticsTracker, theAdditionSetRootConceptCodes, false); + addConcept(theCsv, parentCodes, nextChild, theStatisticsTracker, false, childIndex); + childIndex++; } } diff --git a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologySvcDeltaR4Test.java b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologySvcDeltaR4Test.java index ffb704b7a68..f663e7357b5 100644 --- a/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologySvcDeltaR4Test.java +++ b/hapi-fhir-jpaserver-base/src/test/java/ca/uhn/fhir/jpa/term/TerminologySvcDeltaR4Test.java @@ -47,8 +47,8 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { delta.addRootConcept("RootB", "Root B"); myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - "RootB seq=2" + "RootA seq=0", + "RootB seq=0" ); delta = new CustomTerminologySet(); @@ -56,10 +56,10 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { delta.addRootConcept("RootD", "Root D"); myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - "RootB seq=2", - "RootC seq=3", - "RootD seq=4" + "RootA seq=0", + "RootB seq=0", + "RootC seq=0", + "RootD seq=0" ); } @@ -103,8 +103,8 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { delta.addRootConcept("RootB", "Root B"); myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - "RootB seq=2" + "RootA seq=0", + "RootB seq=0" ); ourLog.info("Have performed add"); @@ -120,10 +120,10 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { root.addChild(TermConceptParentChildLink.RelationshipTypeEnum.ISA).setCode("ChildAB").setDisplay("Child AB"); myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - " ChildAA seq=1", - " ChildAB seq=2", - "RootB seq=2" + "RootA seq=0", + " ChildAA seq=0", + " ChildAB seq=1", + "RootB seq=0" ); } @@ -142,10 +142,10 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { delta.addRootConcept("RootB", "Root B"); outcome = myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - " ChildAA seq=1", - " ChildAAA seq=1", - "RootB seq=2" + "RootA seq=0", + " ChildAA seq=0", + " ChildAAA seq=0", + "RootB seq=0" ); assertEquals(4, outcome.getUpdatedConceptCount()); @@ -154,10 +154,10 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { .addChild(TermConceptParentChildLink.RelationshipTypeEnum.ISA).setCode("ChildAA").setDisplay("Child AA"); outcome = myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertHierarchyContains( - "RootA seq=1", - "RootB seq=2", - " ChildAA seq=1", - " ChildAAA seq=1" + "RootA seq=0", + "RootB seq=0", + " ChildAA seq=0", + " ChildAAA seq=0" ); assertEquals(2, outcome.getUpdatedConceptCount()); @@ -199,8 +199,8 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { // Check so far assertHierarchyContains( - "ParentA seq=1", - " ChildA seq=1" + "ParentA seq=0", + " ChildA seq=0" ); // Add sub-child to existing child @@ -212,9 +212,9 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { // Check so far assertHierarchyContains( - "ParentA seq=1", - " ChildA seq=1", - " ChildAA seq=1" + "ParentA seq=0", + " ChildA seq=0", + " ChildAA seq=0" ); } @@ -285,14 +285,14 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { UploadStatistics outcome = myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertEquals(8, outcome.getUpdatedConceptCount()); assertHierarchyContains( - "CodeA seq=1", - " CodeAA seq=1", - " CodeAAA seq=1", - " CodeAAB seq=2", - "CodeB seq=2", - " CodeBA seq=1", - " CodeBAA seq=1", - " CodeBAB seq=2" + "CodeA seq=0", + " CodeAA seq=0", + " CodeAAA seq=0", + " CodeAAB seq=1", + "CodeB seq=0", + " CodeBA seq=0", + " CodeBAA seq=0", + " CodeBAB seq=1" ); // Move a single child code to a new spot and make sure the hierarchy comes along @@ -304,14 +304,14 @@ public class TerminologySvcDeltaR4Test extends BaseJpaR4Test { outcome = myTermCodeSystemStorageSvc.applyDeltaCodeSystemsAdd("http://foo/cs", delta); assertEquals(2, outcome.getUpdatedConceptCount()); assertHierarchyContains( - "CodeA seq=1", - "CodeB seq=2", - " CodeBA seq=1", - " CodeBAA seq=1", - " CodeBAB seq=2", - " CodeAA seq=2", // <-- CodeAA got added here so it comes second - " CodeAAA seq=1", - " CodeAAB seq=2" + "CodeA seq=0", + "CodeB seq=0", + " CodeBA seq=0", + " CodeBAA seq=0", + " CodeBAB seq=1", + " CodeAA seq=0", // <-- CodeAA got added here so it comes second + " CodeAAA seq=0", + " CodeAAB seq=1" ); }