# frozen_string_literal: true RSpec.describe DiscourseAi::AiBot::Tools::WebBrowser do let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4-turbo") } before do SiteSetting.ai_openai_api_key = "asd" SiteSetting.ai_bot_enabled = true end describe "#invoke" do it "can retrieve the content of a webpage and returns the processed text" do url = "https://arxiv.org/html/2403.17011v1" processed_text = "This is a simplified version of the webpage content." # Mocking the web request to return a specific HTML structure stub_request(:get, url).to_return( status: 200, body: "Test

This is a simplified version of the webpage content.

", ) tool = described_class.new({ url: url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result).to have_key(:text) expect(result[:text]).to eq(processed_text) expect(result[:url]).to eq(url) end it "returns an error if the webpage cannot be retrieved" do url = "https://arxiv.org/html/2403.17011v1" # Simulating a failed request stub_request(:get, url).to_return(status: [500, "Internal Server Error"]) tool = described_class.new({ url: url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result).to have_key(:error) expect(result[:error]).to include("Failed to retrieve the web page") end end describe "#invoke with various HTML structures" do let(:url) { "http://example.com" } it "extracts main content from a simple HTML structure" do simple_html = "

Simple content.

" stub_request(:get, url).to_return(status: 200, body: simple_html) tool = described_class.new({ url: url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result[:text]).to eq("Simple content.") end it "correctly ignores script and style tags" do complex_html = "

Only relevant content here.

" stub_request(:get, url).to_return(status: 200, body: complex_html) tool = described_class.new({ url: url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result[:text]).to eq("Only relevant content here.") end it "extracts content from nested structures" do nested_html = "

Nested paragraph 1.

Nested paragraph 2.

" stub_request(:get, url).to_return(status: 200, body: nested_html) tool = described_class.new({ url: url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result[:text]).to eq("Nested paragraph 1. Nested paragraph 2.") end end describe "#invoke with redirects" do let(:initial_url) { "http://initial-example.com" } let(:final_url) { "http://final-example.com" } let(:redirect_html) { "

Redirected content.

" } it "follows redirects and retrieves content from the final destination" do stub_request(:get, initial_url).to_return(status: 302, headers: { "Location" => final_url }) stub_request(:get, final_url).to_return(status: 200, body: redirect_html) tool = described_class.new({ url: initial_url }, bot_user: bot_user, llm: llm) result = tool.invoke expect(result[:url]).to eq(final_url) expect(result[:text]).to eq("Redirected content.") end end end