FEATURE: JavaScript evaluation tool (#630)
This is similar to code interpreter by ChatGPT, except that it uses JavaScript as the execution engine. Safeguards were added to ensure memory is constrained and evaluation times out.
This commit is contained in:
parent
4d8d822351
commit
232f12eba6
|
@ -234,6 +234,7 @@ en:
|
|||
search_settings: "Searching site settings"
|
||||
dall_e: "Generate image"
|
||||
search_meta_discourse: "Search Meta Discourse"
|
||||
javascript_evaluator: "Evaluate JavaScript"
|
||||
command_help:
|
||||
web_browser: "Browse web page using the AI Bot"
|
||||
github_search_code: "Search for code in a GitHub repository"
|
||||
|
@ -253,6 +254,7 @@ en:
|
|||
search_settings: "Search site settings"
|
||||
dall_e: "Generate image using DALL-E 3"
|
||||
search_meta_discourse: "Search Meta Discourse"
|
||||
javascript_evaluator: "Evaluate JavaScript"
|
||||
command_description:
|
||||
web_browser: "Reading <a href='%{url}'>%{url}</a>"
|
||||
github_search_code: "Searched for '%{query}' in %{repo}"
|
||||
|
|
|
@ -88,6 +88,7 @@ module DiscourseAi
|
|||
Tools::GithubFileContent,
|
||||
Tools::GithubPullRequestDiff,
|
||||
Tools::WebBrowser,
|
||||
Tools::JavascriptEvaluator,
|
||||
]
|
||||
|
||||
tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present?
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "mini_racer"
|
||||
require "json"
|
||||
|
||||
module DiscourseAi
|
||||
module AiBot
|
||||
module Tools
|
||||
class JavascriptEvaluator < Tool
|
||||
TIMEOUT = 500
|
||||
MAX_MEMORY = 10_000_000
|
||||
MARSHAL_STACK_DEPTH = 20
|
||||
|
||||
def self.signature
|
||||
{
|
||||
name: name,
|
||||
description: "Evaluates JavaScript code using MiniRacer",
|
||||
parameters: [
|
||||
{
|
||||
name: "script",
|
||||
description: "The JavaScript code to evaluate",
|
||||
type: "string",
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
}
|
||||
end
|
||||
|
||||
def self.name
|
||||
"javascript_evaluator"
|
||||
end
|
||||
|
||||
def script
|
||||
parameters[:script].to_s
|
||||
end
|
||||
|
||||
def timeout
|
||||
@timeout || TIMEOUT
|
||||
end
|
||||
|
||||
def timeout=(value)
|
||||
@timeout = value
|
||||
end
|
||||
|
||||
def max_memory
|
||||
@max_memory || MAX_MEMORY
|
||||
end
|
||||
|
||||
def max_memory=(value)
|
||||
@max_memory = value
|
||||
end
|
||||
|
||||
def invoke
|
||||
context =
|
||||
MiniRacer::Context.new(
|
||||
timeout: timeout,
|
||||
max_memory: MAX_MEMORY,
|
||||
marshal_stack_depth: MARSHAL_STACK_DEPTH,
|
||||
)
|
||||
|
||||
# works around llms like anthropic loving console.log
|
||||
eval_script = <<~JS
|
||||
let console = {};
|
||||
console.log = function(val) {
|
||||
return val;
|
||||
};
|
||||
|
||||
#{script}
|
||||
JS
|
||||
|
||||
result = context.eval(eval_script)
|
||||
|
||||
# only do special handling and truncating for long strings
|
||||
if result.to_s.length > 1000
|
||||
result = truncate(result.to_s, max_length: 10_000, percent_length: 0.3, llm: llm)
|
||||
end
|
||||
|
||||
{ result: result }
|
||||
rescue MiniRacer::ScriptTerminatedError => e
|
||||
{ error: "JavaScript execution timed out: #{e.message}" }
|
||||
rescue MiniRacer::V8OutOfMemoryError => e
|
||||
{ error: "JavaScript execution exceeded memory limit: #{e.message}" }
|
||||
rescue MiniRacer::Error => e
|
||||
{ error: "JavaScript execution error: #{e.message}" }
|
||||
end
|
||||
|
||||
def details
|
||||
<<~MD
|
||||
|
||||
|
||||
```
|
||||
#{script}
|
||||
```
|
||||
|
||||
MD
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def description_args
|
||||
{ script: script }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,71 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::AiBot::Tools::JavascriptEvaluator do
|
||||
let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
|
||||
let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-3.5-turbo") }
|
||||
let(:progress_blk) { Proc.new {} }
|
||||
|
||||
before { SiteSetting.ai_bot_enabled = true }
|
||||
|
||||
describe "#invoke" do
|
||||
it "successfully evaluates a simple JavaScript expression" do
|
||||
evaluator = described_class.new({ script: "2 + 2" }, bot_user: bot_user, llm: llm)
|
||||
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:result]).to eq(4)
|
||||
end
|
||||
|
||||
it "handles JavaScript execution timeout" do
|
||||
evaluator = described_class.new({ script: "while(true){}" }, bot_user: bot_user, llm: llm)
|
||||
|
||||
evaluator.timeout = 5
|
||||
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:error]).to include("JavaScript execution timed out")
|
||||
end
|
||||
|
||||
it "handles JavaScript memory limit exceeded" do
|
||||
evaluator =
|
||||
described_class.new(
|
||||
{ script: "var a = new Array(10000); while(true) { a = a.concat(new Array(10000)) }" },
|
||||
bot_user: bot_user,
|
||||
llm: llm,
|
||||
)
|
||||
|
||||
evaluator.max_memory = 10_000
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:error]).to include("JavaScript execution exceeded memory limit")
|
||||
end
|
||||
|
||||
it "returns error for invalid JavaScript syntax" do
|
||||
evaluator = described_class.new({ script: "const x =;" }, bot_user: bot_user, llm: llm)
|
||||
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:error]).to include("JavaScript execution error: ")
|
||||
end
|
||||
|
||||
it "truncates long results" do
|
||||
evaluator =
|
||||
described_class.new(
|
||||
{ script: "const x = 'zxn'.repeat(10000); x + 'Z';" },
|
||||
bot_user: bot_user,
|
||||
llm: llm,
|
||||
)
|
||||
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:result]).not_to include("Z")
|
||||
end
|
||||
|
||||
it "returns result for more complex JavaScript" do
|
||||
evaluator =
|
||||
described_class.new(
|
||||
{ script: "const x = [1, 2, 3, 4].map(n => n * 2); x.reduce((a, b) => a + b, 0);" },
|
||||
bot_user: bot_user,
|
||||
llm: llm,
|
||||
)
|
||||
|
||||
result = evaluator.invoke(&progress_blk)
|
||||
expect(result[:result]).to eq(20)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue