From 3b0cfdbe5c4f3acda704284b7a6f577762e4a595 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 11 Apr 2024 13:26:58 +1000 Subject: [PATCH] FIX: throwing away first file in diff (#573) We were chucking out the first file in a PR diff due to a logic bug --- lib/ai_bot/tools/github_pull_request_diff.rb | 11 ++---- lib/completions/dialects/llama2_classic.rb | 2 +- .../dialects/llama2_classic_spec.rb | 4 +- spec/lib/completions/dialects/mixtral_spec.rb | 4 +- .../completions/dialects/orca_style_spec.rb | 4 +- .../tools/github_pull_request_diff_spec.rb | 37 +++++++++++++++++++ 6 files changed, 48 insertions(+), 14 deletions(-) diff --git a/lib/ai_bot/tools/github_pull_request_diff.rb b/lib/ai_bot/tools/github_pull_request_diff.rb index e5973cdc..5fa8ca4b 100644 --- a/lib/ai_bot/tools/github_pull_request_diff.rb +++ b/lib/ai_bot/tools/github_pull_request_diff.rb @@ -63,7 +63,7 @@ module DiscourseAi if response_code == "200" diff = body - diff = sort_and_shorten_diff(diff) + diff = self.class.sort_and_shorten_diff(diff) diff = truncate(diff, max_length: 20_000, percent_length: 0.3, llm: llm) { diff: diff } else @@ -75,14 +75,12 @@ module DiscourseAi { repo: repo, pull_id: pull_id, url: url } end - private - - def sort_and_shorten_diff(diff, threshold: LARGE_OBJECT_THRESHOLD) + def self.sort_and_shorten_diff(diff, threshold: LARGE_OBJECT_THRESHOLD) # This regex matches the start of a new file in the diff, # capturing the file paths for later use. file_start_regex = /^diff --git.*/ - prev_start = 0 + prev_start = -1 prev_match = nil split = [] @@ -90,9 +88,8 @@ module DiscourseAi diff.scan(file_start_regex) do |match| match_start = $~.offset(0)[0] # Get the start position of this match - if prev_start != 0 + if prev_start != -1 full_diff = diff[prev_start...match_start] - split << [prev_match, full_diff] end diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb index d5510719..3b5675f1 100644 --- a/lib/completions/dialects/llama2_classic.rb +++ b/lib/completions/dialects/llama2_classic.rb @@ -33,7 +33,7 @@ module DiscourseAi elsif msg[:type] == :model memo << "\n#{msg[:content]}" elsif msg[:type] == :tool - tool = JSON.parse(msg[:content], symbolize_names: true) + JSON.parse(msg[:content], symbolize_names: true) memo << "\n[INST]\n" memo << (<<~TEXT).strip diff --git a/spec/lib/completions/dialects/llama2_classic_spec.rb b/spec/lib/completions/dialects/llama2_classic_spec.rb index 30a40bbf..0242ebf6 100644 --- a/spec/lib/completions/dialects/llama2_classic_spec.rb +++ b/spec/lib/completions/dialects/llama2_classic_spec.rb @@ -12,7 +12,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do [INST] <> #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} <> @@ -30,7 +30,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do [INST] <> #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} <> diff --git a/spec/lib/completions/dialects/mixtral_spec.rb b/spec/lib/completions/dialects/mixtral_spec.rb index 533d3954..499dad73 100644 --- a/spec/lib/completions/dialects/mixtral_spec.rb +++ b/spec/lib/completions/dialects/mixtral_spec.rb @@ -11,7 +11,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do llama2_classic_version = <<~TEXT [INST] #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} [/INST] Ok @@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do expected = +(<<~TEXT).strip [INST] #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} [/INST] Ok diff --git a/spec/lib/completions/dialects/orca_style_spec.rb b/spec/lib/completions/dialects/orca_style_spec.rb index 5513f184..63b414b8 100644 --- a/spec/lib/completions/dialects/orca_style_spec.rb +++ b/spec/lib/completions/dialects/orca_style_spec.rb @@ -11,7 +11,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do llama2_classic_version = <<~TEXT ### System: #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} ### User: @@ -28,7 +28,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do expected = +(<<~TEXT) ### System: #{context.system_insts} - #{described_class.tool_preamble} + #{described_class.tool_preamble(include_array_tip: false)} #{context.dialect_tools} ### User: diff --git a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb index ae896056..0399108b 100644 --- a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb +++ b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb @@ -7,6 +7,43 @@ RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do let(:bot_user) { Fabricate(:user) } let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4") } + context "with #sort_and_shorten_diff" do + it "sorts and shortens the diff without dropping data" do + diff = <<~DIFF + diff --git a/src/lib.rs b/src/lib.rs + index b466edd..66b068f 100644 + --- a/src/lib.rs + +++ b/src/lib.rs + this is a longer diff + this is a longer diff + this is a longer diff + diff --git a/tests/test_encoding.py b/tests/test_encoding.py + index 27b2192..9f31319 100644 + --- a/tests/test_encoding.py + +++ b/tests/test_encoding.py + short diff + DIFF + + sorted_diff = described_class.sort_and_shorten_diff(diff) + + expect(sorted_diff).to eq(<<~DIFF) + diff --git a/tests/test_encoding.py b/tests/test_encoding.py + index 27b2192..9f31319 100644 + --- a/tests/test_encoding.py + +++ b/tests/test_encoding.py + short diff + + diff --git a/src/lib.rs b/src/lib.rs + index b466edd..66b068f 100644 + --- a/src/lib.rs + +++ b/src/lib.rs + this is a longer diff + this is a longer diff + this is a longer diff + DIFF + end + end + context "with a valid pull request" do let(:repo) { "discourse/discourse-automation" } let(:pull_id) { 253 }