discourse-ai/lib/inference/open_ai_image_generator.rb
Sam c34fcc8a95
FEATURE: forum researcher persona for deep research (#1313)
This commit introduces a new Forum Researcher persona specialized in deep forum content analysis along with comprehensive improvements to our AI infrastructure.

Key additions:

    New Forum Researcher persona with advanced filtering and analysis capabilities
    Robust filtering system supporting tags, categories, dates, users, and keywords
    LLM formatter to efficiently process and chunk research results

Infrastructure improvements:

    Implemented CancelManager class to centrally manage AI completion cancellations
    Replaced callback-based cancellation with a more robust pattern
    Added systematic cancellation monitoring with callbacks

Other improvements:

    Added configurable default_enabled flag to control which personas are enabled by default
    Updated translation strings for the new researcher functionality
    Added comprehensive specs for the new components

    Renames Researcher -> Web Researcher

This change makes our AI platform more stable while adding powerful research capabilities that can analyze forum trends and surface relevant content.
2025-05-14 12:36:16 +10:00

470 lines
14 KiB
Ruby

# frozen_string_literal: true
module ::DiscourseAi
module Inference
class OpenAiImageGenerator
TIMEOUT = 60
MAX_IMAGE_SIZE = 20_971_520 # 20MB (technically 25 is supported by API)
def self.create_uploads!(
prompts,
model:,
size: nil,
api_key: nil,
api_url: nil,
user_id:,
for_private_message: false,
n: 1,
quality: nil,
style: nil,
background: nil,
moderation: "low",
output_compression: nil,
output_format: nil,
title: nil,
cancel_manager: nil
)
# Get the API responses in parallel threads
api_responses =
generate_images_in_threads(
prompts,
model: model,
size: size,
api_key: api_key,
api_url: api_url,
n: n,
quality: quality,
style: style,
background: background,
moderation: moderation,
output_compression: output_compression,
output_format: output_format,
cancel_manager: cancel_manager,
)
raise api_responses[0] if api_responses.all? { |resp| resp.is_a?(StandardError) }
api_responses = api_responses.filter { |response| !response.is_a?(StandardError) }
create_uploads_from_responses(api_responses, user_id, for_private_message, title)
end
# Method for image editing that returns Upload objects
def self.create_edited_upload!(
images,
prompt,
model: "gpt-image-1",
size: "auto",
api_key: nil,
api_url: nil,
user_id:,
for_private_message: false,
n: 1,
quality: nil,
cancel_manager: nil
)
api_response =
edit_images(
images,
prompt,
model: model,
size: size,
api_key: api_key,
api_url: api_url,
n: n,
quality: quality,
cancel_manager: cancel_manager,
)
create_uploads_from_responses([api_response], user_id, for_private_message).first
end
# Common method to create uploads from API responses
def self.create_uploads_from_responses(
api_responses,
user_id,
for_private_message,
title = nil
)
all_uploads = []
api_responses.each do |response|
next unless response
response[:data].each_with_index do |image, index|
Tempfile.create("ai_image_#{index}.png") do |file|
file.binmode
file.write(Base64.decode64(image[:b64_json]))
file.rewind
upload =
UploadCreator.new(
file,
title || "image.png",
for_private_message: for_private_message,
).create_for(user_id)
all_uploads << {
# Use revised_prompt if available (DALL-E 3), otherwise use original prompt
prompt: image[:revised_prompt] || response[:original_prompt],
upload: upload,
}
end
end
end
all_uploads
end
def self.generate_images_in_threads(
prompts,
model:,
size:,
api_key:,
api_url:,
n:,
quality:,
style:,
background:,
moderation:,
output_compression:,
output_format:,
cancel_manager:
)
prompts = [prompts] unless prompts.is_a?(Array)
prompts = prompts.take(4) # Limit to 4 prompts max
# Use provided values or defaults
api_key ||= SiteSetting.ai_openai_api_key
api_url ||= SiteSetting.ai_openai_image_generation_url
# Thread processing
threads = []
prompts.each do |prompt|
threads << Thread.new(prompt) do |inner_prompt|
attempts = 0
begin
perform_generation_api_call!(
inner_prompt,
model: model,
size: size,
api_key: api_key,
api_url: api_url,
n: n,
quality: quality,
style: style,
background: background,
moderation: moderation,
output_compression: output_compression,
output_format: output_format,
cancel_manager: cancel_manager,
)
rescue => e
attempts += 1
# to keep tests speedy
if !Rails.env.test? && !cancel_manager&.cancelled?
retry if attempts < 3
end
if !cancel_manager&.cancelled?
Discourse.warn_exception(
e,
message: "Failed to generate image for prompt #{prompt}\n",
)
puts "Error generating image for prompt: #{prompt} #{e}" if Rails.env.development?
end
e
end
end
end
threads.each(&:join)
threads.filter_map(&:value)
end
def self.edit_images(
images,
prompt,
model: "gpt-image-1",
size: "auto",
api_key: nil,
api_url: nil,
n: 1,
quality: nil,
cancel_manager: nil
)
images = [images] if !images.is_a?(Array)
# For dall-e-2, only one image is supported
if model == "dall-e-2" && images.length > 1
raise "DALL-E 2 only supports editing one image at a time"
end
# For gpt-image-1, limit to 16 images
images = images.take(16) if model == "gpt-image-1" && images.length > 16
# Use provided values or defaults
api_key ||= SiteSetting.ai_openai_api_key
api_url ||= SiteSetting.ai_openai_image_edit_url
# Execute edit API call
attempts = 0
begin
perform_edit_api_call!(
images,
prompt,
model: model,
size: size,
api_key: api_key,
api_url: api_url,
n: n,
quality: quality,
cancel_manager: cancel_manager,
)
rescue => e
raise e if cancel_manager&.cancelled?
attempts += 1
if !Rails.env.test?
sleep 2
retry if attempts < 3
end
if Rails.env.development?
puts "Error editing image(s) with prompt: #{prompt} #{e}"
p e
end
Discourse.warn_exception(e, message: "Failed to edit image(s) with prompt #{prompt}")
raise e
end
end
# Image generation API call method
def self.perform_generation_api_call!(
prompt,
model:,
size: nil,
api_key: nil,
api_url: nil,
n: 1,
quality: nil,
style: nil,
background: nil,
moderation: nil,
output_compression: nil,
output_format: nil,
cancel_manager: nil
)
api_key ||= SiteSetting.ai_openai_api_key
api_url ||= SiteSetting.ai_openai_image_generation_url
uri = URI(api_url)
headers = { "Content-Type" => "application/json" }
if uri.host.include?("azure")
headers["api-key"] = api_key
else
headers["Authorization"] = "Bearer #{api_key}"
end
# Build payload based on model type
payload = { model: model, prompt: prompt, n: n }
# Add model-specific parameters
if model == "gpt-image-1"
if size
payload[:size] = size
else
payload[:size] = "auto"
end
payload[:background] = background if background
payload[:moderation] = moderation if moderation
payload[:output_compression] = output_compression if output_compression
payload[:output_format] = output_format if output_format
payload[:quality] = quality if quality
elsif model.start_with?("dall")
payload[:size] = size || "1024x1024"
payload[:quality] = quality || "hd"
payload[:style] = style if style
payload[:response_format] = "b64_json"
end
# Store original prompt for upload metadata
original_prompt = prompt
cancel_manager_callback = nil
FinalDestination::HTTP.start(
uri.host,
uri.port,
use_ssl: uri.scheme == "https",
read_timeout: TIMEOUT,
open_timeout: TIMEOUT,
write_timeout: TIMEOUT,
) do |http|
request = Net::HTTP::Post.new(uri, headers)
request.body = payload.to_json
if cancel_manager
cancel_manager_callback = lambda { http.finish }
cancel_manager.add_callback(cancel_manager_callback)
end
json = nil
http.request(request) do |response|
if response.code.to_i != 200
raise "OpenAI API returned #{response.code} #{response.body}"
else
json = JSON.parse(response.body, symbolize_names: true)
# Add original prompt to response to preserve it
json[:original_prompt] = original_prompt
end
end
json
end
ensure
if cancel_manager && cancel_manager_callback
cancel_manager.remove_callback(cancel_manager_callback)
end
end
def self.perform_edit_api_call!(
images,
prompt,
model: "gpt-image-1",
size: "auto",
api_key:,
api_url:,
n: 1,
quality: nil,
cancel_manager: nil
)
uri = URI(api_url)
# Setup for multipart/form-data request
boundary = SecureRandom.hex
headers = { "Content-Type" => "multipart/form-data; boundary=#{boundary}" }
if uri.host.include?("azure")
headers["api-key"] = api_key
else
headers["Authorization"] = "Bearer #{api_key}"
end
# Create multipart form data
body = []
# Add model
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"model\"\r\n\r\n"
body << "#{model}\r\n"
files_to_delete = []
# Add images
images.each do |image|
image_data = nil
image_filename = nil
# Handle different image input types
if image.is_a?(Upload)
image_path =
if image.local?
Discourse.store.path_for(image)
else
filename =
Discourse.store.download_safe(image, max_file_size_kb: MAX_IMAGE_SIZE)&.path
files_to_delete << filename if filename
filename
end
image_data = File.read(image_path)
image_filename = File.basename(image.url)
else
raise "Unsupported image format. Must be an Upload"
end
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"image[]\"; filename=\"#{image_filename}\"\r\n"
body << "Content-Type: image/png\r\n\r\n"
body << image_data
body << "\r\n"
end
# Add prompt
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"prompt\"\r\n\r\n"
body << "#{prompt}\r\n"
# Add size if provided
if size
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"size\"\r\n\r\n"
body << "#{size}\r\n"
end
# Add n if provided and not the default
if n != 1
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"n\"\r\n\r\n"
body << "#{n}\r\n"
end
# Add quality if provided
if quality
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"quality\"\r\n\r\n"
body << "#{quality}\r\n"
end
# Add response_format if provided
if model.start_with?("dall")
# Default to b64_json for consistency with generation
body << "--#{boundary}\r\n"
body << "Content-Disposition: form-data; name=\"response_format\"\r\n\r\n"
body << "b64_json\r\n"
end
# End boundary
body << "--#{boundary}--\r\n"
# Store original prompt for upload metadata
original_prompt = prompt
cancel_manager_callback = nil
FinalDestination::HTTP.start(
uri.host,
uri.port,
use_ssl: uri.scheme == "https",
read_timeout: TIMEOUT,
open_timeout: TIMEOUT,
write_timeout: TIMEOUT,
) do |http|
request = Net::HTTP::Post.new(uri.path, headers)
request.body = body.join
if cancel_manager
cancel_manager_callback = lambda { http.finish }
cancel_manager.add_callback(cancel_manager_callback)
end
json = nil
http.request(request) do |response|
if response.code.to_i != 200
raise "OpenAI API returned #{response.code} #{response.body}"
else
json = JSON.parse(response.body, symbolize_names: true)
# Add original prompt to response to preserve it
json[:original_prompt] = original_prompt
end
end
json
end
ensure
if cancel_manager && cancel_manager_callback
cancel_manager.remove_callback(cancel_manager_callback)
end
if files_to_delete.present?
files_to_delete.each { |file| File.delete(file) if File.exist?(file) }
end
end
end
end
end