discourse/spec/lib/turbo_tests/flaky/manager_spec.rb

# frozen_string_literal: true

RSpec.describe TurboTests::Flaky::Manager do
  fab!(:rspec_example_1) do
    RSpec::Core::Example
      .describe
      .example("rspec example 1")
      .tap do |example|
        example.execution_result.status = :failed
        example.execution_result.exception =
          StandardError
            .new(
              "some error\n\n#{TurboTests::Flaky::FailedExample::SCREENSHOT_PREFIX}/some/path/to/screenshot.png",
            )
            .tap { |exception| exception.set_backtrace(["some backtrace"]) }
      end
  end

  fab!(:rspec_example_2) do
    RSpec::Core::Example
      .describe
      .example("rspec example 2")
      .tap do |example|
        example.execution_result.status = :failed
        example.execution_result.exception =
          StandardError
            .new("some error")
            .tap { |exception| exception.set_backtrace(["some backtrace"]) }
      end
  end

  fab!(:fake_example_1) do
    TurboTests::FakeExample.from_obj(
      TurboTests::JsonExample.new(rspec_example_1).to_json,
      process_id: 1,
      command_string: "some command string",
    )
  end

  fab!(:fake_example_2) do
    TurboTests::FakeExample.from_obj(
      TurboTests::JsonExample.new(rspec_example_2).to_json,
      process_id: 2,
      command_string: "some other command string",
    )
  end

  def with_fake_path
    tmp_file = Tempfile.new

    stub_const(TurboTests::Flaky::Manager, "PATH", tmp_file.path) { yield }
  ensure
    tmp_file.delete
  end

  describe ".potential_flaky_tests" do
    it "should return the failed examples' `location_rerun_argument`" do
      with_fake_path do
        TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])

        expect(TurboTests::Flaky::Manager.potential_flaky_tests).to eq(
          %w[
            ./spec/lib/turbo_tests/flaky/manager_spec.rb:7
            ./spec/lib/turbo_tests/flaky/manager_spec.rb:22
          ],
        )
      end
    end
  end

  describe ".log_potential_flaky_tests" do
    it "should log the failed examples correctly" do
      with_fake_path do
        TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])

        expect(JSON.parse(File.read(TurboTests::Flaky::Manager::PATH))).to eq(
          [
            {
              "message_lines" =>
                "Failure/Error: Unable to infer file and line number from backtrace\n\nStandardError:\n  some error\n\n  [Screenshot Image]: /some/path/to/screenshot.png",
              "description" => "rspec example 1",
              "exception_message" =>
                "some error\n\n[Screenshot Image]: /some/path/to/screenshot.png",
              "exception_name" => "StandardError",
              "backtrace" => ["some backtrace"],
              "failure_screenshot_path" => "/some/path/to/screenshot.png",
              "location_rerun_argument" => "./spec/lib/turbo_tests/flaky/manager_spec.rb:7",
              "rerun_command" => "some command string",
            },
            {
              "message_lines" =>
                "Failure/Error: Unable to infer file and line number from backtrace\n\nStandardError:\n  some error",
              "description" => "rspec example 2",
              "exception_message" => "some error",
              "exception_name" => "StandardError",
              "backtrace" => ["some backtrace"],
              "failure_screenshot_path" => nil,
              "location_rerun_argument" => "./spec/lib/turbo_tests/flaky/manager_spec.rb:22",
              "rerun_command" => "some other command string",
            },
          ],
        )
      end
    end
  end

  describe ".remove_example" do
    it "should remove the from the log file" do
      with_fake_path do
        TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])
        TurboTests::Flaky::Manager.remove_example([rspec_example_1])

        parsed_json = JSON.parse(File.read(TurboTests::Flaky::Manager::PATH))

        expect(parsed_json.size).to eq(1)
        expect(parsed_json.first["description"]).to eq("rspec example 2")
      end
    end

    it "should delete the log file if there are no more examples" do
      with_fake_path do
        TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])
        TurboTests::Flaky::Manager.remove_example([rspec_example_1, rspec_example_2])

        expect(File.exist?(TurboTests::Flaky::Manager::PATH)).to eq(false)
      end
    end
  end
end
DEV: Introduce automatic reruns to RSpec tests on Github actions (#24811) What motivated this change? Our builds on Github actions have been extremely flaky mostly due to system tests. This has led to a drop in confidence in our test suite where our developers tend to assume that a failed job is due to a flaky system test. As a result, we have had occurrences where changes that resulted in legitimate test failures are merged into the `main` branch because developers assumed it was a flaky test. What does this change do? This change seeks to reduce the flakiness of our builds on Github Actions by automatically re-running RSpec tests once when they fail. If a failed test passes subsequently in the re-run, we mark the test as flaky by logging it into a file on disk which is then uploaded as an artifact of the Github workflow run. We understand that automatically re-runs will lead to lower accuracy of our tests but we accept this as an acceptable trade-off since a fragile build has a much greater impact on our developers' time. Internally, the Discourse development team will be running a service to fetch the flaky tests which have been logged for internal monitoring. How is the change implemented? 1. A `--retry-and-log-flaky-tests` CLI flag is added to the `bin/turbo_rspec` CLI which will then initialize `TurboTests::Runner` with the `retry_and_log_flaky_tests` kwarg set to `true`. 2. When the `retry_and_log_flaky_tests` kwarg is set to `true` for `TurboTests::Runner`, we will register an additional formatter `Flaky::FailuresLoggerFormatter` to the `TurboTests::Reporter` in the `TurboTests::Runner#run` method. The `Flaky::FailuresLoggerFormatter` has a simple job of logging all failed examples to a file on disk when running all the tests. The details of the failed example which are logged can be found in `TurboTests::Flaky::FailedExample.to_h`. 3. Once all the tests have been run once, we check the result for any failed examples and if there are, we read the file on disk to fetch the `location_rerun_location` of the failed examples which is then used to run the tests in a new RSpec process. In the rerun, we configure a `TurboTests::Flaky::FlakyDetectorFormatter` with RSpec which removes all failed examples from the log file on disk since those examples are not flaky tests. Note that if there are too many failed examples on the first run, we will deem the failures to likely not be due to flaky tests and not re-run the test failures. As of writing, the threshold of failed examples is set to 10. If there are more than 10 failed examples, we will not re-run the failures. 2023-12-12 18:18:27 -05:00			`# frozen_string_literal: true`

			`RSpec.describe TurboTests::Flaky::Manager do`
			`fab!(:rspec_example_1) do`
			`RSpec::Core::Example`
			`.describe`
			`.example("rspec example 1")`
			`.tap do \|example\|`
			`example.execution_result.status = :failed`
			`example.execution_result.exception =`
			`StandardError`
			`.new(`
			`"some error\n\n#{TurboTests::Flaky::FailedExample::SCREENSHOT_PREFIX}/some/path/to/screenshot.png",`
			`)`
			`.tap { \|exception\| exception.set_backtrace(["some backtrace"]) }`
			`end`
			`end`

			`fab!(:rspec_example_2) do`
			`RSpec::Core::Example`
			`.describe`
			`.example("rspec example 2")`
			`.tap do \|example\|`
			`example.execution_result.status = :failed`
			`example.execution_result.exception =`
			`StandardError`
			`.new("some error")`
			`.tap { \|exception\| exception.set_backtrace(["some backtrace"]) }`
			`end`
			`end`

			`fab!(:fake_example_1) do`
			`TurboTests::FakeExample.from_obj(`
			`TurboTests::JsonExample.new(rspec_example_1).to_json,`
			`process_id: 1,`
			`command_string: "some command string",`
			`)`
			`end`

			`fab!(:fake_example_2) do`
			`TurboTests::FakeExample.from_obj(`
			`TurboTests::JsonExample.new(rspec_example_2).to_json,`
			`process_id: 2,`
			`command_string: "some other command string",`
			`)`
			`end`

			`def with_fake_path`
			`tmp_file = Tempfile.new`

			`stub_const(TurboTests::Flaky::Manager, "PATH", tmp_file.path) { yield }`
			`ensure`
			`tmp_file.delete`
			`end`

			`describe ".potential_flaky_tests" do`
			it "should return the failed examples' `location_rerun_argument`" do
			`with_fake_path do`
			`TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])`

			`expect(TurboTests::Flaky::Manager.potential_flaky_tests).to eq(`
			`%w[`
			`./spec/lib/turbo_tests/flaky/manager_spec.rb:7`
			`./spec/lib/turbo_tests/flaky/manager_spec.rb:22`
			`],`
			`)`
			`end`
			`end`
			`end`

			`describe ".log_potential_flaky_tests" do`
			`it "should log the failed examples correctly" do`
			`with_fake_path do`
			`TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])`

			`expect(JSON.parse(File.read(TurboTests::Flaky::Manager::PATH))).to eq(`
			`[`
			`{`
			`"message_lines" =>`
			`"Failure/Error: Unable to infer file and line number from backtrace\n\nStandardError:\n some error\n\n [Screenshot Image]: /some/path/to/screenshot.png",`
			`"description" => "rspec example 1",`
DEV: Include exception details for each test in flaky tests report (#24892) Why this change? The exception message and name is useful when analyzing why a test failed. 2023-12-13 22:11:11 -05:00			`"exception_message" =>`
			`"some error\n\n[Screenshot Image]: /some/path/to/screenshot.png",`
			`"exception_name" => "StandardError",`
DEV: Introduce automatic reruns to RSpec tests on Github actions (#24811) What motivated this change? Our builds on Github actions have been extremely flaky mostly due to system tests. This has led to a drop in confidence in our test suite where our developers tend to assume that a failed job is due to a flaky system test. As a result, we have had occurrences where changes that resulted in legitimate test failures are merged into the `main` branch because developers assumed it was a flaky test. What does this change do? This change seeks to reduce the flakiness of our builds on Github Actions by automatically re-running RSpec tests once when they fail. If a failed test passes subsequently in the re-run, we mark the test as flaky by logging it into a file on disk which is then uploaded as an artifact of the Github workflow run. We understand that automatically re-runs will lead to lower accuracy of our tests but we accept this as an acceptable trade-off since a fragile build has a much greater impact on our developers' time. Internally, the Discourse development team will be running a service to fetch the flaky tests which have been logged for internal monitoring. How is the change implemented? 1. A `--retry-and-log-flaky-tests` CLI flag is added to the `bin/turbo_rspec` CLI which will then initialize `TurboTests::Runner` with the `retry_and_log_flaky_tests` kwarg set to `true`. 2. When the `retry_and_log_flaky_tests` kwarg is set to `true` for `TurboTests::Runner`, we will register an additional formatter `Flaky::FailuresLoggerFormatter` to the `TurboTests::Reporter` in the `TurboTests::Runner#run` method. The `Flaky::FailuresLoggerFormatter` has a simple job of logging all failed examples to a file on disk when running all the tests. The details of the failed example which are logged can be found in `TurboTests::Flaky::FailedExample.to_h`. 3. Once all the tests have been run once, we check the result for any failed examples and if there are, we read the file on disk to fetch the `location_rerun_location` of the failed examples which is then used to run the tests in a new RSpec process. In the rerun, we configure a `TurboTests::Flaky::FlakyDetectorFormatter` with RSpec which removes all failed examples from the log file on disk since those examples are not flaky tests. Note that if there are too many failed examples on the first run, we will deem the failures to likely not be due to flaky tests and not re-run the test failures. As of writing, the threshold of failed examples is set to 10. If there are more than 10 failed examples, we will not re-run the failures. 2023-12-12 18:18:27 -05:00			`"backtrace" => ["some backtrace"],`
			`"failure_screenshot_path" => "/some/path/to/screenshot.png",`
			`"location_rerun_argument" => "./spec/lib/turbo_tests/flaky/manager_spec.rb:7",`
			`"rerun_command" => "some command string",`
			`},`
			`{`
			`"message_lines" =>`
			`"Failure/Error: Unable to infer file and line number from backtrace\n\nStandardError:\n some error",`
			`"description" => "rspec example 2",`
DEV: Include exception details for each test in flaky tests report (#24892) Why this change? The exception message and name is useful when analyzing why a test failed. 2023-12-13 22:11:11 -05:00			`"exception_message" => "some error",`
			`"exception_name" => "StandardError",`
DEV: Introduce automatic reruns to RSpec tests on Github actions (#24811) What motivated this change? Our builds on Github actions have been extremely flaky mostly due to system tests. This has led to a drop in confidence in our test suite where our developers tend to assume that a failed job is due to a flaky system test. As a result, we have had occurrences where changes that resulted in legitimate test failures are merged into the `main` branch because developers assumed it was a flaky test. What does this change do? This change seeks to reduce the flakiness of our builds on Github Actions by automatically re-running RSpec tests once when they fail. If a failed test passes subsequently in the re-run, we mark the test as flaky by logging it into a file on disk which is then uploaded as an artifact of the Github workflow run. We understand that automatically re-runs will lead to lower accuracy of our tests but we accept this as an acceptable trade-off since a fragile build has a much greater impact on our developers' time. Internally, the Discourse development team will be running a service to fetch the flaky tests which have been logged for internal monitoring. How is the change implemented? 1. A `--retry-and-log-flaky-tests` CLI flag is added to the `bin/turbo_rspec` CLI which will then initialize `TurboTests::Runner` with the `retry_and_log_flaky_tests` kwarg set to `true`. 2. When the `retry_and_log_flaky_tests` kwarg is set to `true` for `TurboTests::Runner`, we will register an additional formatter `Flaky::FailuresLoggerFormatter` to the `TurboTests::Reporter` in the `TurboTests::Runner#run` method. The `Flaky::FailuresLoggerFormatter` has a simple job of logging all failed examples to a file on disk when running all the tests. The details of the failed example which are logged can be found in `TurboTests::Flaky::FailedExample.to_h`. 3. Once all the tests have been run once, we check the result for any failed examples and if there are, we read the file on disk to fetch the `location_rerun_location` of the failed examples which is then used to run the tests in a new RSpec process. In the rerun, we configure a `TurboTests::Flaky::FlakyDetectorFormatter` with RSpec which removes all failed examples from the log file on disk since those examples are not flaky tests. Note that if there are too many failed examples on the first run, we will deem the failures to likely not be due to flaky tests and not re-run the test failures. As of writing, the threshold of failed examples is set to 10. If there are more than 10 failed examples, we will not re-run the failures. 2023-12-12 18:18:27 -05:00			`"backtrace" => ["some backtrace"],`
			`"failure_screenshot_path" => nil,`
			`"location_rerun_argument" => "./spec/lib/turbo_tests/flaky/manager_spec.rb:22",`
			`"rerun_command" => "some other command string",`
			`},`
			`],`
			`)`
			`end`
			`end`
			`end`

			`describe ".remove_example" do`
			`it "should remove the from the log file" do`
			`with_fake_path do`
			`TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])`
			`TurboTests::Flaky::Manager.remove_example([rspec_example_1])`

			`parsed_json = JSON.parse(File.read(TurboTests::Flaky::Manager::PATH))`

			`expect(parsed_json.size).to eq(1)`
			`expect(parsed_json.first["description"]).to eq("rspec example 2")`
			`end`
			`end`

			`it "should delete the log file if there are no more examples" do`
			`with_fake_path do`
			`TurboTests::Flaky::Manager.log_potential_flaky_tests([fake_example_1, fake_example_2])`
			`TurboTests::Flaky::Manager.remove_example([rspec_example_1, rspec_example_2])`

			`expect(File.exist?(TurboTests::Flaky::Manager::PATH)).to eq(false)`
			`end`
			`end`
			`end`
			`end`