diff --git a/lib/ai_bot/tools/github_pull_request_diff.rb b/lib/ai_bot/tools/github_pull_request_diff.rb
index e5973cdc..5fa8ca4b 100644
--- a/lib/ai_bot/tools/github_pull_request_diff.rb
+++ b/lib/ai_bot/tools/github_pull_request_diff.rb
@@ -63,7 +63,7 @@ module DiscourseAi
if response_code == "200"
diff = body
- diff = sort_and_shorten_diff(diff)
+ diff = self.class.sort_and_shorten_diff(diff)
diff = truncate(diff, max_length: 20_000, percent_length: 0.3, llm: llm)
{ diff: diff }
else
@@ -75,14 +75,12 @@ module DiscourseAi
{ repo: repo, pull_id: pull_id, url: url }
end
- private
-
- def sort_and_shorten_diff(diff, threshold: LARGE_OBJECT_THRESHOLD)
+ def self.sort_and_shorten_diff(diff, threshold: LARGE_OBJECT_THRESHOLD)
# This regex matches the start of a new file in the diff,
# capturing the file paths for later use.
file_start_regex = /^diff --git.*/
- prev_start = 0
+ prev_start = -1
prev_match = nil
split = []
@@ -90,9 +88,8 @@ module DiscourseAi
diff.scan(file_start_regex) do |match|
match_start = $~.offset(0)[0] # Get the start position of this match
- if prev_start != 0
+ if prev_start != -1
full_diff = diff[prev_start...match_start]
-
split << [prev_match, full_diff]
end
diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb
index d5510719..3b5675f1 100644
--- a/lib/completions/dialects/llama2_classic.rb
+++ b/lib/completions/dialects/llama2_classic.rb
@@ -33,7 +33,7 @@ module DiscourseAi
elsif msg[:type] == :model
memo << "\n#{msg[:content]}"
elsif msg[:type] == :tool
- tool = JSON.parse(msg[:content], symbolize_names: true)
+ JSON.parse(msg[:content], symbolize_names: true)
memo << "\n[INST]\n"
memo << (<<~TEXT).strip
diff --git a/spec/lib/completions/dialects/llama2_classic_spec.rb b/spec/lib/completions/dialects/llama2_classic_spec.rb
index 30a40bbf..0242ebf6 100644
--- a/spec/lib/completions/dialects/llama2_classic_spec.rb
+++ b/spec/lib/completions/dialects/llama2_classic_spec.rb
@@ -12,7 +12,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
[INST]
<>
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
<>
@@ -30,7 +30,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
[INST]
<>
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
<>
diff --git a/spec/lib/completions/dialects/mixtral_spec.rb b/spec/lib/completions/dialects/mixtral_spec.rb
index 533d3954..499dad73 100644
--- a/spec/lib/completions/dialects/mixtral_spec.rb
+++ b/spec/lib/completions/dialects/mixtral_spec.rb
@@ -11,7 +11,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do
llama2_classic_version = <<~TEXT
[INST]
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
[/INST] Ok
@@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do
expected = +(<<~TEXT).strip
[INST]
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
[/INST] Ok
diff --git a/spec/lib/completions/dialects/orca_style_spec.rb b/spec/lib/completions/dialects/orca_style_spec.rb
index 5513f184..63b414b8 100644
--- a/spec/lib/completions/dialects/orca_style_spec.rb
+++ b/spec/lib/completions/dialects/orca_style_spec.rb
@@ -11,7 +11,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
llama2_classic_version = <<~TEXT
### System:
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
### User:
@@ -28,7 +28,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
expected = +(<<~TEXT)
### System:
#{context.system_insts}
- #{described_class.tool_preamble}
+ #{described_class.tool_preamble(include_array_tip: false)}
#{context.dialect_tools}
### User:
diff --git a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
index ae896056..0399108b 100644
--- a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
@@ -7,6 +7,43 @@ RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do
let(:bot_user) { Fabricate(:user) }
let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4") }
+ context "with #sort_and_shorten_diff" do
+ it "sorts and shortens the diff without dropping data" do
+ diff = <<~DIFF
+ diff --git a/src/lib.rs b/src/lib.rs
+ index b466edd..66b068f 100644
+ --- a/src/lib.rs
+ +++ b/src/lib.rs
+ this is a longer diff
+ this is a longer diff
+ this is a longer diff
+ diff --git a/tests/test_encoding.py b/tests/test_encoding.py
+ index 27b2192..9f31319 100644
+ --- a/tests/test_encoding.py
+ +++ b/tests/test_encoding.py
+ short diff
+ DIFF
+
+ sorted_diff = described_class.sort_and_shorten_diff(diff)
+
+ expect(sorted_diff).to eq(<<~DIFF)
+ diff --git a/tests/test_encoding.py b/tests/test_encoding.py
+ index 27b2192..9f31319 100644
+ --- a/tests/test_encoding.py
+ +++ b/tests/test_encoding.py
+ short diff
+
+ diff --git a/src/lib.rs b/src/lib.rs
+ index b466edd..66b068f 100644
+ --- a/src/lib.rs
+ +++ b/src/lib.rs
+ this is a longer diff
+ this is a longer diff
+ this is a longer diff
+ DIFF
+ end
+ end
+
context "with a valid pull request" do
let(:repo) { "discourse/discourse-automation" }
let(:pull_id) { 253 }