From c077daaade8a7357efff3a61126b5043d9ff38c0 Mon Sep 17 00:00:00 2001 From: Abhishek Radhakrishnan Date: Thu, 12 Sep 2024 09:06:35 -0400 Subject: [PATCH] GHA steps to collect and upload heap dumps to debug UT OOM errors (#17029) * Add GHA steps to tar and upload any heap dumps on failure to debug UT OOM issues. * Add jvm options to heap dump OnOutOfMemoryError Co-authored-by: Elliott Freis <108356317+imply-elliott@users.noreply.github.com> --------- Co-authored-by: Elliott Freis <108356317+imply-elliott@users.noreply.github.com> --- .github/workflows/reusable-unit-tests.yml | 22 ++++++++++++++++++++++ pom.xml | 2 ++ 2 files changed, 24 insertions(+) diff --git a/.github/workflows/reusable-unit-tests.yml b/.github/workflows/reusable-unit-tests.yml index 291cb648b3e..3634da0e200 100644 --- a/.github/workflows/reusable-unit-tests.yml +++ b/.github/workflows/reusable-unit-tests.yml @@ -117,6 +117,28 @@ jobs: MAVEN_PROJECTS: ${{ inputs.maven_projects }} run: ./.github/scripts/unit_tests_script.sh + - name: Check for .hprof files on failure + if: ${{ failure() }} + id: check_for_heap_dump + run: | + if ls ${GITHUB_WORKSPACE}/target/*.hprof 1> /dev/null 2>&1; then + echo "found_hprof=true" >> "$GITHUB_ENV" + else + echo "found_hprof=false" >> "$GITHUB_ENV" + fi + + - name: Collect tarball hprof dumps if they exist on failure + if: ${{ failure() && env.found_hprof == 'true' }} + run: | + tar cvzf ${RUNNER_TEMP}/hprof-dumps.tgz ${GITHUB_WORKSPACE}/target/*.hprof + + - name: Upload hprof dumps to GitHub if they exist on failure + if: ${{ failure() && env.found_hprof == 'true' }} + uses: actions/upload-artifact@master + with: + name: Hprof-${{ inputs.group }} hprof dumps (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }}) + path: ${{ runner.temp }}/hprof-dumps.tgz + - name: set outputs on failure id: set_outputs if: ${{ failure() }} diff --git a/pom.xml b/pom.xml index 10c3452d5f1..47a6bd5cef3 100644 --- a/pom.xml +++ b/pom.xml @@ -1778,6 +1778,8 @@ -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Daws.region=us-east-1 -Ddruid.test.stupidPool.poison=true + -XX:OnOutOfMemoryError='chmod 644 ${project.parent.basedir}/target/*.hprof' + -XX:HeapDumpPath=${project.parent.basedir}/target -Ddruid.indexing.doubleStorage=double ${jfrProfilerArgLine}