mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-25 17:12:16 +00:00
DEV: improve artifact editing and eval system (#1130)
- Add non-contiguous search/replace support using ... syntax - Add judge support for evaluating LLM outputs with ratings - Improve error handling and reporting in eval runner - Add full section replacement support without search blocks - Add fabricators and specs for artifact diffing - Track failed searches to improve debugging - Add JS syntax validation for artifact versions in eval system - Update prompt documentation with clear guidelines * improve eval output * move error handling * llm as a judge * fix spec * small note on evals
This commit is contained in:
parent
02f0908963
commit
0c9466059c
23
README.md
23
README.md
@ -3,3 +3,26 @@
|
||||
**Plugin Summary**
|
||||
|
||||
For more information, please see: https://meta.discourse.org/t/discourse-ai/259214?u=falco
|
||||
|
||||
### Evals
|
||||
|
||||
The directory `evals` contains AI evals for the Discourse AI plugin.
|
||||
|
||||
To run them use:
|
||||
|
||||
cd evals
|
||||
./run --help
|
||||
|
||||
```
|
||||
Usage: evals/run [options]
|
||||
-e, --eval NAME Name of the evaluation to run
|
||||
--list-models List models
|
||||
-m, --model NAME Model to evaluate (will eval all models if not specified)
|
||||
-l, --list List evals
|
||||
```
|
||||
|
||||
To run evals you will need to configure API keys in your environment:
|
||||
|
||||
OPENAI_API_KEY=your_openai_api_key
|
||||
ANTHROPIC_API_KEY=your_anthropic_api_key
|
||||
GEMINI_API_KEY=your_gemini_api_key
|
||||
|
@ -4,6 +4,19 @@ class AiArtifactVersion < ActiveRecord::Base
|
||||
validates :html, length: { maximum: 65_535 }
|
||||
validates :css, length: { maximum: 65_535 }
|
||||
validates :js, length: { maximum: 65_535 }
|
||||
|
||||
# used when generating test cases
|
||||
def write_to(path)
|
||||
css_path = "#{path}/main.css"
|
||||
html_path = "#{path}/main.html"
|
||||
js_path = "#{path}/main.js"
|
||||
instructions_path = "#{path}/instructions.txt"
|
||||
|
||||
File.write(css_path, css)
|
||||
File.write(html_path, html)
|
||||
File.write(js_path, js)
|
||||
File.write(instructions_path, change_description)
|
||||
end
|
||||
end
|
||||
|
||||
# == Schema Information
|
||||
|
@ -10,7 +10,17 @@ class DiscourseAi::Evals::Eval
|
||||
:vision,
|
||||
:expected_output,
|
||||
:expected_output_regex,
|
||||
:expected_tool_call
|
||||
:expected_tool_call,
|
||||
:judge
|
||||
|
||||
class EvalError < StandardError
|
||||
attr_reader :context
|
||||
|
||||
def initialize(message, context)
|
||||
super(message)
|
||||
@context = context
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(path:)
|
||||
@yaml = YAML.load_file(path).symbolize_keys
|
||||
@ -27,10 +37,14 @@ class DiscourseAi::Evals::Eval
|
||||
Regexp.new(@expected_output_regex, Regexp::MULTILINE) if @expected_output_regex
|
||||
@expected_tool_call = @yaml[:expected_tool_call]
|
||||
@expected_tool_call.symbolize_keys! if @expected_tool_call
|
||||
@judge = @yaml[:judge]
|
||||
@judge.symbolize_keys! if @judge
|
||||
|
||||
@args[:path] = File.expand_path(File.join(File.dirname(path), @args[:path])) if @args&.key?(
|
||||
:path,
|
||||
)
|
||||
@args.each do |key, value|
|
||||
if (key.to_s.include?("_path") || key.to_s == "path") && value.is_a?(String)
|
||||
@args[key] = File.expand_path(File.join(File.dirname(path), value))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def run(llm:)
|
||||
@ -44,6 +58,8 @@ class DiscourseAi::Evals::Eval
|
||||
image_to_text(llm, **args)
|
||||
when "prompt"
|
||||
prompt_call(llm, **args)
|
||||
when "edit_artifact"
|
||||
edit_artifact(llm, **args)
|
||||
end
|
||||
|
||||
if expected_output
|
||||
@ -53,7 +69,7 @@ class DiscourseAi::Evals::Eval
|
||||
{ result: :fail, expected_output: expected_output, actual_output: result }
|
||||
end
|
||||
elsif expected_output_regex
|
||||
if result.match?(expected_output_regex)
|
||||
if result.to_s.match?(expected_output_regex)
|
||||
{ result: :pass }
|
||||
else
|
||||
{ result: :fail, expected_output: expected_output_regex, actual_output: result }
|
||||
@ -71,9 +87,13 @@ class DiscourseAi::Evals::Eval
|
||||
else
|
||||
{ result: :pass }
|
||||
end
|
||||
elsif judge
|
||||
judge_result(result)
|
||||
else
|
||||
{ result: :unknown, actual_output: result }
|
||||
{ result: :pass }
|
||||
end
|
||||
rescue EvalError => e
|
||||
{ result: :fail, message: e.message, context: e.context }
|
||||
end
|
||||
|
||||
def print
|
||||
@ -96,14 +116,68 @@ class DiscourseAi::Evals::Eval
|
||||
|
||||
private
|
||||
|
||||
def helper(llm, input:, name:)
|
||||
def judge_result(result)
|
||||
prompt = judge[:prompt].dup
|
||||
prompt.sub!("{{output}}", result)
|
||||
prompt.sub!("{{input}}", args[:input])
|
||||
|
||||
prompt += <<~SUFFIX
|
||||
|
||||
Reply with a rating from 1 to 10, where 10 is perfect and 1 is terrible.
|
||||
|
||||
example output:
|
||||
|
||||
[RATING]10[/RATING] perfect output
|
||||
|
||||
example output:
|
||||
|
||||
[RATING]5[/RATING]
|
||||
|
||||
the following failed to preserve... etc...
|
||||
SUFFIX
|
||||
|
||||
judge_llm = DiscourseAi::Evals::Llm.choose(judge[:llm]).first
|
||||
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
"You are an expert judge tasked at testing LLM outputs.",
|
||||
messages: [{ type: :user, content: prompt }],
|
||||
)
|
||||
|
||||
result = judge_llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user)
|
||||
|
||||
if rating = result.match(%r{\[RATING\](\d+)\[/RATING\]})
|
||||
rating = rating[1].to_i
|
||||
end
|
||||
|
||||
if rating.to_i >= judge[:pass_rating]
|
||||
{ result: :pass }
|
||||
else
|
||||
{
|
||||
result: :fail,
|
||||
message: "LLM Rating below threshold, it was #{rating}, expecting #{judge[:pass_rating]}",
|
||||
context: result,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def helper(llm, input:, name:, locale: nil)
|
||||
completion_prompt = CompletionPrompt.find_by(name: name)
|
||||
helper = DiscourseAi::AiHelper::Assistant.new(helper_llm: llm.llm_proxy)
|
||||
user = Discourse.system_user
|
||||
if locale
|
||||
user = User.new
|
||||
class << user
|
||||
attr_accessor :effective_locale
|
||||
end
|
||||
|
||||
user.effective_locale = locale
|
||||
user.admin = true
|
||||
end
|
||||
result =
|
||||
helper.generate_and_send_prompt(
|
||||
completion_prompt,
|
||||
input,
|
||||
current_user = Discourse.system_user,
|
||||
current_user = user,
|
||||
_force_default_locale = false,
|
||||
)
|
||||
|
||||
@ -169,4 +243,73 @@ class DiscourseAi::Evals::Eval
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def edit_artifact(llm, css_path:, js_path:, html_path:, instructions_path:)
|
||||
css = File.read(css_path)
|
||||
js = File.read(js_path)
|
||||
html = File.read(html_path)
|
||||
instructions = File.read(instructions_path)
|
||||
artifact =
|
||||
AiArtifact.create!(
|
||||
css: css,
|
||||
js: js,
|
||||
html: html,
|
||||
user_id: Discourse.system_user.id,
|
||||
post_id: 1,
|
||||
name: "eval artifact",
|
||||
)
|
||||
|
||||
post = Post.new(topic_id: 1, id: 1)
|
||||
diff =
|
||||
DiscourseAi::AiBot::ArtifactUpdateStrategies::Diff.new(
|
||||
llm: llm.llm_model.to_llm,
|
||||
post: post,
|
||||
user: Discourse.system_user,
|
||||
artifact: artifact,
|
||||
artifact_version: nil,
|
||||
instructions: instructions,
|
||||
)
|
||||
diff.apply
|
||||
|
||||
if diff.failed_searches.present?
|
||||
puts "Eval Errors encountered"
|
||||
p diff.failed_searches
|
||||
raise EvalError.new("Failed to apply all changes", diff.failed_searches)
|
||||
end
|
||||
|
||||
version = artifact.versions.last
|
||||
raise EvalError.new("Invalid JS", version.js) if !valid_javascript?(version.js)
|
||||
|
||||
output = { css: version.css, js: version.js, html: version.html }
|
||||
|
||||
artifact.destroy
|
||||
output
|
||||
end
|
||||
|
||||
def valid_javascript?(str)
|
||||
require "open3"
|
||||
|
||||
# Create a temporary file with the JavaScript code
|
||||
Tempfile.create(%w[test .js]) do |f|
|
||||
f.write(str)
|
||||
f.flush
|
||||
|
||||
File.write("/tmp/test.js", str)
|
||||
|
||||
begin
|
||||
Discourse::Utils.execute_command(
|
||||
"node",
|
||||
"--check",
|
||||
f.path,
|
||||
failure_message: "Invalid JavaScript syntax",
|
||||
timeout: 30, # reasonable timeout in seconds
|
||||
)
|
||||
true
|
||||
rescue Discourse::Utils::CommandError
|
||||
false
|
||||
end
|
||||
end
|
||||
rescue StandardError
|
||||
false
|
||||
end
|
||||
end
|
||||
|
@ -155,9 +155,18 @@ class DiscourseAi::Evals::Runner
|
||||
|
||||
if result[:result] == :fail
|
||||
puts "Failed 🔴"
|
||||
puts "---- Expected ----\n#{result[:expected_output]}"
|
||||
puts "---- Actual ----\n#{result[:actual_output]}"
|
||||
puts "Error: #{result[:message]}" if result[:message]
|
||||
# this is deliberate, it creates a lot of noise, but sometimes for debugging it's useful
|
||||
#puts "Context: #{result[:context].to_s[0..2000]}" if result[:context]
|
||||
if result[:expected_output] && result[:actual_output]
|
||||
puts "---- Expected ----\n#{result[:expected_output]}"
|
||||
puts "---- Actual ----\n#{result[:actual_output]}"
|
||||
end
|
||||
logger.error("Evaluation failed with LLM: #{llm.name}")
|
||||
logger.error("Error: #{result[:message]}") if result[:message]
|
||||
logger.error("Expected: #{result[:expected_output]}") if result[:expected_output]
|
||||
logger.error("Actual: #{result[:actual_output]}") if result[:actual_output]
|
||||
logger.error("Context: #{result[:context]}") if result[:context]
|
||||
elsif result[:result] == :pass
|
||||
puts "Passed 🟢"
|
||||
logger.info("Evaluation passed with LLM: #{llm.name}")
|
||||
|
@ -3,8 +3,15 @@ module DiscourseAi
|
||||
module AiBot
|
||||
module ArtifactUpdateStrategies
|
||||
class Diff < Base
|
||||
attr_reader :failed_searches
|
||||
|
||||
private
|
||||
|
||||
def initialize(**kwargs)
|
||||
super
|
||||
@failed_searches = []
|
||||
end
|
||||
|
||||
def build_prompt
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
system_prompt,
|
||||
@ -51,15 +58,21 @@ module DiscourseAi
|
||||
content = source.public_send(section == :javascript ? :js : section)
|
||||
blocks.each do |block|
|
||||
begin
|
||||
content =
|
||||
DiscourseAi::Utils::DiffUtils::SimpleDiff.apply(
|
||||
content,
|
||||
block[:search],
|
||||
block[:replace],
|
||||
)
|
||||
if !block[:search]
|
||||
content = block[:replace]
|
||||
else
|
||||
content =
|
||||
DiscourseAi::Utils::DiffUtils::SimpleDiff.apply(
|
||||
content,
|
||||
block[:search],
|
||||
block[:replace],
|
||||
)
|
||||
end
|
||||
rescue DiscourseAi::Utils::DiffUtils::SimpleDiff::NoMatchError
|
||||
@failed_searches << { section: section, search: block[:search] }
|
||||
# TODO, we may need to inform caller here, LLM made a mistake which it
|
||||
# should correct
|
||||
puts "Failed to find search: #{block[:search]}"
|
||||
end
|
||||
end
|
||||
updated_content[section == :javascript ? :js : section] = content
|
||||
@ -76,7 +89,8 @@ module DiscourseAi
|
||||
private
|
||||
|
||||
def extract_search_replace_blocks(content)
|
||||
return nil if content.blank?
|
||||
return nil if content.blank? || content.to_s.strip.downcase.match?(/^\(?no changes?\)?$/m)
|
||||
return [{ replace: content }] if !content.match?(/<<+\s*SEARCH/)
|
||||
|
||||
blocks = []
|
||||
remaining = content
|
||||
@ -98,29 +112,35 @@ module DiscourseAi
|
||||
|
||||
1. Use EXACTLY this format for changes:
|
||||
<<<<<<< SEARCH
|
||||
(exact code to find)
|
||||
(first line of code to replace)
|
||||
(other lines of code to avoid ambiguity)
|
||||
(last line of code to replace)
|
||||
=======
|
||||
(replacement code)
|
||||
>>>>>>> REPLACE
|
||||
2. DO NOT modify the markers or add spaces around them
|
||||
3. DO NOT add explanations or comments within sections
|
||||
4. ONLY include [HTML], [CSS], and [JavaScript] sections if they have changes
|
||||
5. Ensure search text matches EXACTLY - partial matches will fail
|
||||
6. Keep changes minimal and focused
|
||||
7. HTML should not include <html>, <head>, or <body> tags, it is injected into a template
|
||||
5. HTML should not include <html>, <head>, or <body> tags, it is injected into a template
|
||||
6. When specifying a SEARCH block, ALWAYS keep it 8 lines or less, you will be interrupted and a retry will be required if you exceed this limit
|
||||
7. NEVER EVER ask followup questions, ALL changes must be performed in a single response, you are consumed via an API, there is no opportunity for humans in the loop
|
||||
8. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines
|
||||
9. Be mindful that ... non-contiguous search is not greedy, the following line will only match the first occurrence of the search block
|
||||
10. Never mix a full section replacement with a search/replace block in the same section
|
||||
11. ALWAYS skip sections you to not want to change, do not include them in the response
|
||||
|
||||
JavaScript libraries must be sourced from the following CDNs, otherwise CSP will reject it:
|
||||
#{AiArtifact::ALLOWED_CDN_SOURCES.join("\n")}
|
||||
|
||||
Reply Format:
|
||||
[HTML]
|
||||
(changes or empty if no changes)
|
||||
(changes or empty if no changes or entire HTML)
|
||||
[/HTML]
|
||||
[CSS]
|
||||
(changes or empty if no changes)
|
||||
(changes or empty if no changes or entire CSS)
|
||||
[/CSS]
|
||||
[JavaScript]
|
||||
(changes or empty if no changes)
|
||||
(changes or empty if no changes or entire JavaScript)
|
||||
[/JavaScript]
|
||||
|
||||
Example - Multiple changes in one file:
|
||||
@ -152,6 +172,68 @@ module DiscourseAi
|
||||
.text { font-size: 16px; }
|
||||
>>>>>>> REPLACE
|
||||
[/CSS]
|
||||
|
||||
Example - Non contiguous search in CSS (replace most CSS with new CSS)
|
||||
|
||||
Original CSS:
|
||||
|
||||
[CSS]
|
||||
body {
|
||||
color: red;
|
||||
}
|
||||
.button {
|
||||
color: blue;
|
||||
}
|
||||
.alert {
|
||||
background-color: green;
|
||||
}
|
||||
.alert2 {
|
||||
background-color: green;
|
||||
}
|
||||
[/CSS]
|
||||
|
||||
[CSS]
|
||||
<<<<<<< SEARCH
|
||||
body {
|
||||
...
|
||||
background-color: green;
|
||||
}
|
||||
=======
|
||||
body {
|
||||
color: red;
|
||||
}
|
||||
>>>>>>> REPLACE
|
||||
|
||||
RESULT:
|
||||
|
||||
[CSS]
|
||||
body {
|
||||
color: red;
|
||||
}
|
||||
.alert2 {
|
||||
background-color: green;
|
||||
}
|
||||
[/CSS]
|
||||
|
||||
Example - full HTML replacement:
|
||||
|
||||
[HTML]
|
||||
<div>something old</div>
|
||||
<div>another somethin old</div>
|
||||
[/HTML]
|
||||
|
||||
output:
|
||||
|
||||
[HTML]
|
||||
<div>something new</div>
|
||||
[/HTML]
|
||||
|
||||
result:
|
||||
[HTML]
|
||||
<div>something new</div>
|
||||
[/HTML]
|
||||
|
||||
|
||||
PROMPT
|
||||
end
|
||||
|
||||
|
@ -26,6 +26,8 @@ module DiscourseAi
|
||||
lines = content.split("\n")
|
||||
search_lines = search.split("\n")
|
||||
|
||||
### TODO implement me
|
||||
|
||||
# 1. Try exact matching
|
||||
match_positions =
|
||||
find_matches(lines, search_lines) { |line, search_line| line == search_line }
|
||||
@ -38,7 +40,17 @@ module DiscourseAi
|
||||
end
|
||||
end
|
||||
|
||||
# 3. Try fuzzy matching
|
||||
# 3. Try non-contiguous line based stripped matching
|
||||
if match_positions.empty?
|
||||
if range = non_contiguous_match_range(lines, search_lines)
|
||||
first_match, last_match = range
|
||||
lines.slice!(first_match, last_match - first_match + 1)
|
||||
lines.insert(first_match, *replace.split("\n"))
|
||||
return lines.join("\n")
|
||||
end
|
||||
end
|
||||
|
||||
# 4. Try fuzzy matching
|
||||
if match_positions.empty?
|
||||
match_positions =
|
||||
find_matches(lines, search_lines) do |line, search_line|
|
||||
@ -46,7 +58,7 @@ module DiscourseAi
|
||||
end
|
||||
end
|
||||
|
||||
# 4. Try block matching as last resort
|
||||
# 5. Try block matching as last resort
|
||||
if match_positions.empty?
|
||||
if block_matches = find_block_matches(content, search)
|
||||
return replace_blocks(content, block_matches, replace)
|
||||
@ -68,6 +80,27 @@ module DiscourseAi
|
||||
|
||||
private
|
||||
|
||||
def non_contiguous_match_range(lines, search_lines)
|
||||
first_idx = nil
|
||||
last_idx = nil
|
||||
search_index = 0
|
||||
|
||||
lines.each_with_index do |line, idx|
|
||||
if search_lines[search_index].strip == "..."
|
||||
search_index += 1
|
||||
break if search_lines[search_index].nil?
|
||||
end
|
||||
if line.strip == search_lines[search_index].strip
|
||||
first_idx ||= idx
|
||||
last_idx = idx
|
||||
search_index += 1
|
||||
return first_idx, last_idx if search_index == search_lines.length
|
||||
end
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
def find_matches(lines, search_lines)
|
||||
matches = []
|
||||
max_index = lines.length - search_lines.length
|
||||
|
19
spec/fabricators/ai_artifact_fabricator.rb
Normal file
19
spec/fabricators/ai_artifact_fabricator.rb
Normal file
@ -0,0 +1,19 @@
|
||||
# frozen_string_literal: true
|
||||
Fabricator(:ai_artifact) do
|
||||
user
|
||||
post
|
||||
name { sequence(:name) { |i| "artifact_#{i}" } }
|
||||
html { "<div>Test Content</div>" }
|
||||
css { ".test { color: blue; }" }
|
||||
js { "console.log('test');" }
|
||||
metadata { { public: false } }
|
||||
end
|
||||
|
||||
Fabricator(:ai_artifact_version) do
|
||||
ai_artifact
|
||||
version_number { sequence(:version_number) { |i| i } }
|
||||
html { "<div>Version Content</div>" }
|
||||
css { ".version { color: red; }" }
|
||||
js { "console.log('version');" }
|
||||
change_description { "Test change" }
|
||||
end
|
214
spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
Normal file
214
spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
Normal file
@ -0,0 +1,214 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::AiBot::ArtifactUpdateStrategies::Diff do
|
||||
fab!(:user)
|
||||
fab!(:post)
|
||||
fab!(:artifact) { Fabricate(:ai_artifact) }
|
||||
fab!(:llm_model)
|
||||
|
||||
let(:llm) { llm_model.to_llm }
|
||||
let(:instructions) { "Update the button color to red" }
|
||||
|
||||
let(:strategy) do
|
||||
described_class.new(
|
||||
llm: llm,
|
||||
post: post,
|
||||
user: user,
|
||||
artifact: artifact,
|
||||
artifact_version: nil,
|
||||
instructions: instructions,
|
||||
)
|
||||
end
|
||||
|
||||
describe "#apply" do
|
||||
it "processes simple search/replace blocks" do
|
||||
original_css = ".button { color: blue; }"
|
||||
artifact.update!(css: original_css)
|
||||
|
||||
response = <<~RESPONSE
|
||||
[CSS]
|
||||
<<<<<<< SEARCH
|
||||
.button { color: blue; }
|
||||
=======
|
||||
.button { color: red; }
|
||||
>>>>>>> REPLACE
|
||||
[/CSS]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
expect(artifact.versions.last.css).to eq(".button { color: red; }")
|
||||
end
|
||||
|
||||
it "handles multiple search/replace blocks in the same section" do
|
||||
original_css = <<~CSS
|
||||
.button { color: blue; }
|
||||
.text { font-size: 12px; }
|
||||
CSS
|
||||
|
||||
artifact.update!(css: original_css)
|
||||
|
||||
response = <<~RESPONSE
|
||||
[CSS]
|
||||
<<<<<<< SEARCH
|
||||
.button { color: blue; }
|
||||
=======
|
||||
.button { color: red; }
|
||||
>>>>>>> REPLACE
|
||||
<<<<<<< SEARCH
|
||||
.text { font-size: 12px; }
|
||||
=======
|
||||
.text { font-size: 16px; }
|
||||
>>>>>>> REPLACE
|
||||
[/CSS]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
expected = <<~CSS.strip
|
||||
.button { color: red; }
|
||||
.text { font-size: 16px; }
|
||||
CSS
|
||||
|
||||
expect(artifact.versions.last.css.strip).to eq(expected.strip)
|
||||
end
|
||||
|
||||
it "handles non-contiguous search/replace using ..." do
|
||||
original_css = <<~CSS
|
||||
body {
|
||||
color: red;
|
||||
}
|
||||
.button {
|
||||
color: blue;
|
||||
}
|
||||
.alert {
|
||||
background-color: green;
|
||||
}
|
||||
CSS
|
||||
|
||||
artifact.update!(css: original_css)
|
||||
|
||||
response = <<~RESPONSE
|
||||
[CSS]
|
||||
<<<<<<< SEARCH
|
||||
body {
|
||||
...
|
||||
background-color: green;
|
||||
}
|
||||
=======
|
||||
body {
|
||||
color: red;
|
||||
}
|
||||
>>>>>>> REPLACE
|
||||
[/CSS]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
expect(artifact.versions.last.css).to eq("body {\n color: red;\n}")
|
||||
end
|
||||
|
||||
it "tracks failed searches" do
|
||||
original_css = ".button { color: blue; }"
|
||||
artifact.update!(css: original_css)
|
||||
|
||||
response = <<~RESPONSE
|
||||
[CSS]
|
||||
<<<<<<< SEARCH
|
||||
.button { color: green; }
|
||||
=======
|
||||
.button { color: red; }
|
||||
>>>>>>> REPLACE
|
||||
[/CSS]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
expect(strategy.failed_searches).to contain_exactly(
|
||||
{ section: :css, search: ".button { color: green; }" },
|
||||
)
|
||||
expect(artifact.versions.last.css).to eq(original_css)
|
||||
end
|
||||
|
||||
it "handles complete section replacements" do
|
||||
original_html = "<div>old content</div>"
|
||||
artifact.update!(html: original_html)
|
||||
|
||||
response = <<~RESPONSE
|
||||
[HTML]
|
||||
<div>new content</div>
|
||||
[/HTML]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
expect(artifact.versions.last.html.strip).to eq("<div>new content</div>")
|
||||
end
|
||||
|
||||
it "ignores empty or 'no changes' sections part 1" do
|
||||
original = {
|
||||
html: "<div>content</div>",
|
||||
css: ".button { color: blue; }",
|
||||
js: "console.log('test');",
|
||||
}
|
||||
|
||||
artifact.update!(html: original[:html], css: original[:css], js: original[:js])
|
||||
|
||||
response = <<~RESPONSE
|
||||
[HTML]
|
||||
no changes
|
||||
[/HTML]
|
||||
[CSS]
|
||||
(NO CHANGES)
|
||||
[/CSS]
|
||||
[JavaScript]
|
||||
<<<<<<< SEARCH
|
||||
console.log('test');
|
||||
=======
|
||||
console.log('(no changes)');
|
||||
>>>>>>> REPLACE
|
||||
[/JavaScript]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
version = artifact.versions.last
|
||||
expect(version.html).to eq(original[:html])
|
||||
expect(version.css).to eq(original[:css])
|
||||
expect(version.js).to eq("console.log('(no changes)');")
|
||||
end
|
||||
|
||||
it "ignores empty or 'no changes' section part 2" do
|
||||
original = {
|
||||
html: "<div>content</div>",
|
||||
css: ".button { color: blue; }",
|
||||
js: "console.log('test');",
|
||||
}
|
||||
|
||||
artifact.update!(html: original[:html], css: original[:css], js: original[:js])
|
||||
|
||||
response = <<~RESPONSE
|
||||
[HTML]
|
||||
(no changes)
|
||||
[/HTML]
|
||||
[CSS]
|
||||
|
||||
[/CSS]
|
||||
[JavaScript]
|
||||
<<<<<<< SEARCH
|
||||
console.log('test');
|
||||
=======
|
||||
console.log('updated');
|
||||
>>>>>>> REPLACE
|
||||
[/JavaScript]
|
||||
RESPONSE
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
|
||||
|
||||
version = artifact.versions.last
|
||||
expect(version.html).to eq(original[:html])
|
||||
expect(version.css).to eq(original[:css])
|
||||
expect(version.js).to eq("console.log('updated');")
|
||||
end
|
||||
end
|
||||
end
|
@ -262,7 +262,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::UpdateArtifact do
|
||||
[/CSS]
|
||||
|
||||
[JavaScript]
|
||||
nothing to do
|
||||
no changes
|
||||
[/JavaScript]
|
||||
|
||||
LLMs like to say nonsense that we can ignore here as well
|
||||
|
@ -171,5 +171,33 @@ RSpec.describe DiscourseAi::Utils::DiffUtils::SimpleDiff do
|
||||
|
||||
expect(subject.apply(content, search, replace).strip).to eq(expected.strip)
|
||||
end
|
||||
|
||||
it "handles missing lines in search" do
|
||||
original = <<~TEXT
|
||||
line1
|
||||
line2
|
||||
line3
|
||||
line4
|
||||
line5
|
||||
line1
|
||||
line2
|
||||
TEXT
|
||||
|
||||
search = <<~TEXT
|
||||
line1
|
||||
...
|
||||
line3
|
||||
...
|
||||
line1
|
||||
TEXT
|
||||
|
||||
replace = ""
|
||||
|
||||
expected = <<~TEXT
|
||||
line2
|
||||
TEXT
|
||||
|
||||
expect(subject.apply(original, search, replace).strip).to eq(expected.strip)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user