From 232f12eba60ea79b45244aa5a9d6eb3f52dab8b4 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 21 May 2024 07:57:01 +1000 Subject: [PATCH] FEATURE: JavaScript evaluation tool (#630) This is similar to code interpreter by ChatGPT, except that it uses JavaScript as the execution engine. Safeguards were added to ensure memory is constrained and evaluation times out. --- config/locales/server.en.yml | 2 + lib/ai_bot/personas/persona.rb | 1 + lib/ai_bot/tools/javascript_evaluator.rb | 106 ++++++++++++++++++ .../ai_bot/tools/javascript_evaluator_spec.rb | 71 ++++++++++++ 4 files changed, 180 insertions(+) create mode 100644 lib/ai_bot/tools/javascript_evaluator.rb create mode 100644 spec/lib/modules/ai_bot/tools/javascript_evaluator_spec.rb diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 689b9db7..13dc3be4 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -234,6 +234,7 @@ en: search_settings: "Searching site settings" dall_e: "Generate image" search_meta_discourse: "Search Meta Discourse" + javascript_evaluator: "Evaluate JavaScript" command_help: web_browser: "Browse web page using the AI Bot" github_search_code: "Search for code in a GitHub repository" @@ -253,6 +254,7 @@ en: search_settings: "Search site settings" dall_e: "Generate image using DALL-E 3" search_meta_discourse: "Search Meta Discourse" + javascript_evaluator: "Evaluate JavaScript" command_description: web_browser: "Reading %{url}" github_search_code: "Searched for '%{query}' in %{repo}" diff --git a/lib/ai_bot/personas/persona.rb b/lib/ai_bot/personas/persona.rb index b7f73f29..6720da7f 100644 --- a/lib/ai_bot/personas/persona.rb +++ b/lib/ai_bot/personas/persona.rb @@ -88,6 +88,7 @@ module DiscourseAi Tools::GithubFileContent, Tools::GithubPullRequestDiff, Tools::WebBrowser, + Tools::JavascriptEvaluator, ] tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present? diff --git a/lib/ai_bot/tools/javascript_evaluator.rb b/lib/ai_bot/tools/javascript_evaluator.rb new file mode 100644 index 00000000..77aadd04 --- /dev/null +++ b/lib/ai_bot/tools/javascript_evaluator.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require "mini_racer" +require "json" + +module DiscourseAi + module AiBot + module Tools + class JavascriptEvaluator < Tool + TIMEOUT = 500 + MAX_MEMORY = 10_000_000 + MARSHAL_STACK_DEPTH = 20 + + def self.signature + { + name: name, + description: "Evaluates JavaScript code using MiniRacer", + parameters: [ + { + name: "script", + description: "The JavaScript code to evaluate", + type: "string", + required: true, + }, + ], + } + end + + def self.name + "javascript_evaluator" + end + + def script + parameters[:script].to_s + end + + def timeout + @timeout || TIMEOUT + end + + def timeout=(value) + @timeout = value + end + + def max_memory + @max_memory || MAX_MEMORY + end + + def max_memory=(value) + @max_memory = value + end + + def invoke + context = + MiniRacer::Context.new( + timeout: timeout, + max_memory: MAX_MEMORY, + marshal_stack_depth: MARSHAL_STACK_DEPTH, + ) + + # works around llms like anthropic loving console.log + eval_script = <<~JS + let console = {}; + console.log = function(val) { + return val; + }; + + #{script} + JS + + result = context.eval(eval_script) + + # only do special handling and truncating for long strings + if result.to_s.length > 1000 + result = truncate(result.to_s, max_length: 10_000, percent_length: 0.3, llm: llm) + end + + { result: result } + rescue MiniRacer::ScriptTerminatedError => e + { error: "JavaScript execution timed out: #{e.message}" } + rescue MiniRacer::V8OutOfMemoryError => e + { error: "JavaScript execution exceeded memory limit: #{e.message}" } + rescue MiniRacer::Error => e + { error: "JavaScript execution error: #{e.message}" } + end + + def details + <<~MD + + + ``` + #{script} + ``` + + MD + end + + private + + def description_args + { script: script } + end + end + end + end +end diff --git a/spec/lib/modules/ai_bot/tools/javascript_evaluator_spec.rb b/spec/lib/modules/ai_bot/tools/javascript_evaluator_spec.rb new file mode 100644 index 00000000..bd8e7543 --- /dev/null +++ b/spec/lib/modules/ai_bot/tools/javascript_evaluator_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::AiBot::Tools::JavascriptEvaluator do + let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } + let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-3.5-turbo") } + let(:progress_blk) { Proc.new {} } + + before { SiteSetting.ai_bot_enabled = true } + + describe "#invoke" do + it "successfully evaluates a simple JavaScript expression" do + evaluator = described_class.new({ script: "2 + 2" }, bot_user: bot_user, llm: llm) + + result = evaluator.invoke(&progress_blk) + expect(result[:result]).to eq(4) + end + + it "handles JavaScript execution timeout" do + evaluator = described_class.new({ script: "while(true){}" }, bot_user: bot_user, llm: llm) + + evaluator.timeout = 5 + + result = evaluator.invoke(&progress_blk) + expect(result[:error]).to include("JavaScript execution timed out") + end + + it "handles JavaScript memory limit exceeded" do + evaluator = + described_class.new( + { script: "var a = new Array(10000); while(true) { a = a.concat(new Array(10000)) }" }, + bot_user: bot_user, + llm: llm, + ) + + evaluator.max_memory = 10_000 + result = evaluator.invoke(&progress_blk) + expect(result[:error]).to include("JavaScript execution exceeded memory limit") + end + + it "returns error for invalid JavaScript syntax" do + evaluator = described_class.new({ script: "const x =;" }, bot_user: bot_user, llm: llm) + + result = evaluator.invoke(&progress_blk) + expect(result[:error]).to include("JavaScript execution error: ") + end + + it "truncates long results" do + evaluator = + described_class.new( + { script: "const x = 'zxn'.repeat(10000); x + 'Z';" }, + bot_user: bot_user, + llm: llm, + ) + + result = evaluator.invoke(&progress_blk) + expect(result[:result]).not_to include("Z") + end + + it "returns result for more complex JavaScript" do + evaluator = + described_class.new( + { script: "const x = [1, 2, 3, 4].map(n => n * 2); x.reduce((a, b) => a + b, 0);" }, + bot_user: bot_user, + llm: llm, + ) + + result = evaluator.invoke(&progress_blk) + expect(result[:result]).to eq(20) + end + end +end