FEATURE: JavaScript evaluation tool (#630)

This is similar to code interpreter by ChatGPT, except that it uses
JavaScript as the execution engine.

Safeguards were added to ensure memory is constrained and evaluation
times out.
This commit is contained in:
Sam 2024-05-21 07:57:01 +10:00 committed by GitHub
parent 4d8d822351
commit 232f12eba6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 180 additions and 0 deletions

View File

@ -234,6 +234,7 @@ en:
search_settings: "Searching site settings" search_settings: "Searching site settings"
dall_e: "Generate image" dall_e: "Generate image"
search_meta_discourse: "Search Meta Discourse" search_meta_discourse: "Search Meta Discourse"
javascript_evaluator: "Evaluate JavaScript"
command_help: command_help:
web_browser: "Browse web page using the AI Bot" web_browser: "Browse web page using the AI Bot"
github_search_code: "Search for code in a GitHub repository" github_search_code: "Search for code in a GitHub repository"
@ -253,6 +254,7 @@ en:
search_settings: "Search site settings" search_settings: "Search site settings"
dall_e: "Generate image using DALL-E 3" dall_e: "Generate image using DALL-E 3"
search_meta_discourse: "Search Meta Discourse" search_meta_discourse: "Search Meta Discourse"
javascript_evaluator: "Evaluate JavaScript"
command_description: command_description:
web_browser: "Reading <a href='%{url}'>%{url}</a>" web_browser: "Reading <a href='%{url}'>%{url}</a>"
github_search_code: "Searched for '%{query}' in %{repo}" github_search_code: "Searched for '%{query}' in %{repo}"

View File

@ -88,6 +88,7 @@ module DiscourseAi
Tools::GithubFileContent, Tools::GithubFileContent,
Tools::GithubPullRequestDiff, Tools::GithubPullRequestDiff,
Tools::WebBrowser, Tools::WebBrowser,
Tools::JavascriptEvaluator,
] ]
tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present? tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present?

View File

@ -0,0 +1,106 @@
# frozen_string_literal: true
require "mini_racer"
require "json"
module DiscourseAi
module AiBot
module Tools
class JavascriptEvaluator < Tool
TIMEOUT = 500
MAX_MEMORY = 10_000_000
MARSHAL_STACK_DEPTH = 20
def self.signature
{
name: name,
description: "Evaluates JavaScript code using MiniRacer",
parameters: [
{
name: "script",
description: "The JavaScript code to evaluate",
type: "string",
required: true,
},
],
}
end
def self.name
"javascript_evaluator"
end
def script
parameters[:script].to_s
end
def timeout
@timeout || TIMEOUT
end
def timeout=(value)
@timeout = value
end
def max_memory
@max_memory || MAX_MEMORY
end
def max_memory=(value)
@max_memory = value
end
def invoke
context =
MiniRacer::Context.new(
timeout: timeout,
max_memory: MAX_MEMORY,
marshal_stack_depth: MARSHAL_STACK_DEPTH,
)
# works around llms like anthropic loving console.log
eval_script = <<~JS
let console = {};
console.log = function(val) {
return val;
};
#{script}
JS
result = context.eval(eval_script)
# only do special handling and truncating for long strings
if result.to_s.length > 1000
result = truncate(result.to_s, max_length: 10_000, percent_length: 0.3, llm: llm)
end
{ result: result }
rescue MiniRacer::ScriptTerminatedError => e
{ error: "JavaScript execution timed out: #{e.message}" }
rescue MiniRacer::V8OutOfMemoryError => e
{ error: "JavaScript execution exceeded memory limit: #{e.message}" }
rescue MiniRacer::Error => e
{ error: "JavaScript execution error: #{e.message}" }
end
def details
<<~MD
```
#{script}
```
MD
end
private
def description_args
{ script: script }
end
end
end
end
end

View File

@ -0,0 +1,71 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::AiBot::Tools::JavascriptEvaluator do
let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-3.5-turbo") }
let(:progress_blk) { Proc.new {} }
before { SiteSetting.ai_bot_enabled = true }
describe "#invoke" do
it "successfully evaluates a simple JavaScript expression" do
evaluator = described_class.new({ script: "2 + 2" }, bot_user: bot_user, llm: llm)
result = evaluator.invoke(&progress_blk)
expect(result[:result]).to eq(4)
end
it "handles JavaScript execution timeout" do
evaluator = described_class.new({ script: "while(true){}" }, bot_user: bot_user, llm: llm)
evaluator.timeout = 5
result = evaluator.invoke(&progress_blk)
expect(result[:error]).to include("JavaScript execution timed out")
end
it "handles JavaScript memory limit exceeded" do
evaluator =
described_class.new(
{ script: "var a = new Array(10000); while(true) { a = a.concat(new Array(10000)) }" },
bot_user: bot_user,
llm: llm,
)
evaluator.max_memory = 10_000
result = evaluator.invoke(&progress_blk)
expect(result[:error]).to include("JavaScript execution exceeded memory limit")
end
it "returns error for invalid JavaScript syntax" do
evaluator = described_class.new({ script: "const x =;" }, bot_user: bot_user, llm: llm)
result = evaluator.invoke(&progress_blk)
expect(result[:error]).to include("JavaScript execution error: ")
end
it "truncates long results" do
evaluator =
described_class.new(
{ script: "const x = 'zxn'.repeat(10000); x + 'Z';" },
bot_user: bot_user,
llm: llm,
)
result = evaluator.invoke(&progress_blk)
expect(result[:result]).not_to include("Z")
end
it "returns result for more complex JavaScript" do
evaluator =
described_class.new(
{ script: "const x = [1, 2, 3, 4].map(n => n * 2); x.reduce((a, b) => a + b, 0);" },
bot_user: bot_user,
llm: llm,
)
result = evaluator.invoke(&progress_blk)
expect(result[:result]).to eq(20)
end
end
end