FEATURE: add full bot support for GPT 3.5 (#87)

Given latest GPT 3.5 16k which is both better steered and supports functions
we can now support rich bot integration.

Clunky system message based steering is removed and instead we use the
function framework provided by Open AI
This commit is contained in:
Sam 2023-06-20 08:45:31 +10:00 committed by GitHub
parent e457c687ca
commit 70c158cae1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 575 additions and 250 deletions

View File

@ -1,22 +1,28 @@
# frozen_string_literal: true
DiscourseAi::AiBot::EntryPoint::BOTS.each do |id, bot_username|
UserEmail.seed do |ue|
ue.id = id
ue.email = "no_email_#{bot_username}"
ue.primary = true
ue.user_id = id
end
# let's not create a bot user if it already exists
# seed seems to be messing with dates on the user
# causing it to look like these bots were created at the
# wrong time
if !User.exists?(id: id)
UserEmail.seed do |ue|
ue.id = id
ue.email = "no_email_#{bot_username}"
ue.primary = true
ue.user_id = id
end
User.seed do |u|
u.id = id
u.name = bot_username.titleize
u.username = UserNameSuggester.suggest(bot_username)
u.password = SecureRandom.hex
u.active = true
u.admin = true
u.moderator = true
u.approved = true
u.trust_level = TrustLevel[4]
User.seed do |u|
u.id = id
u.name = bot_username.titleize
u.username = UserNameSuggester.suggest(bot_username)
u.password = SecureRandom.hex
u.active = true
u.admin = true
u.moderator = true
u.approved = true
u.trust_level = TrustLevel[4]
end
end
end

View File

@ -8,13 +8,17 @@ module DiscourseAi
end
def bot_prompt_with_topic_context(post)
super(post).join("\n\n")
super(post).join("\n\n") + "\n\nAssistant:"
end
def prompt_limit
7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
end
def title_prompt(post)
super(post).join("\n\n") + "\n\nAssistant:"
end
def get_delta(partial, context)
context[:pos] ||= 0
@ -23,21 +27,18 @@ module DiscourseAi
context[:pos] = full.length
if !context[:processed]
delta = ""
index = full.index("Assistant: ")
if index
delta = full[index + 11..-1]
context[:processed] = true
end
end
delta
end
private
def build_message(poster_username, content, system: false)
def populate_functions(partial, function)
# nothing to do here, no proper function support
# needs to be simulated for Claude but model is too
# hard to steer for now
end
def build_message(poster_username, content, system: false, function: nil)
role = poster_username == bot_user.username ? "Assistant" : "Human"
"#{role}: #{content}"

View File

@ -3,6 +3,25 @@
module DiscourseAi
module AiBot
class Bot
class Functions
attr_reader :functions
attr_reader :current_function
def initialize
@functions = []
@current_function = nil
end
def add_function(name)
@current_function = { name: name, arguments: +"" }
functions << current_function
end
def add_argument_fragment(fragment)
@current_function[:arguments] << fragment
end
end
attr_reader :bot_user
BOT_NOT_FOUND = Class.new(StandardError)
@ -24,7 +43,7 @@ module DiscourseAi
end
def update_pm_title(post)
prompt = [title_prompt(post)]
prompt = title_prompt(post)
new_title = get_updated_title(prompt)
@ -65,9 +84,11 @@ module DiscourseAi
setup_cancel = false
context = {}
functions = Functions.new
submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel|
reply << get_delta(partial, context)
populate_functions(partial, functions)
if redis_stream_key && !Discourse.redis.get(redis_stream_key)
cancel&.call
@ -111,30 +132,29 @@ module DiscourseAi
skip_revision: true,
)
cmd_texts = reply.split("\n").filter { |l| l[0] == "!" }
bot_reply_post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: [])
prompt = post.post_custom_prompt.custom_prompt || []
prompt << build_message(bot_user.username, reply)
post.post_custom_prompt.update!(custom_prompt: prompt)
end
if functions.functions.length > 0
chain = false
standalone = false
cmd_texts[0...max_commands_per_reply].each do |cmd_text|
command_name, args = cmd_text[1..-1].strip.split(" ", 2)
functions.functions.each do |function|
name, args = function[:name], function[:arguments]
if command_klass = available_commands.detect { |cmd| cmd.invoked?(command_name) }
if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) }
command = command_klass.new(bot_user, args)
chain_intermediate = command.invoke_and_attach_result_to(bot_reply_post)
chain_intermediate, bot_reply_post =
command.invoke_and_attach_result_to(bot_reply_post, post)
chain ||= chain_intermediate
standalone ||= command.standalone?
end
end
if cmd_texts.length > max_commands_per_reply
raw = +bot_reply_post.raw.dup
cmd_texts[max_commands_per_reply..-1].each { |cmd_text| raw.sub!(cmd_text, "") }
bot_reply_post.raw = raw
bot_reply_post.save!(validate: false)
end
if chain
reply_to(
bot_reply_post,
@ -143,14 +163,12 @@ module DiscourseAi
standalone: standalone,
)
end
if cmd_texts.length == 0 && (post_custom_prompt = bot_reply_post.post_custom_prompt)
prompt = post_custom_prompt.custom_prompt
prompt << [reply, bot_user.username]
post_custom_prompt.update!(custom_prompt: prompt)
end
end
rescue => e
if Rails.env.development?
p e
puts e.backtrace
end
raise e if Rails.env.test?
Discourse.warn_exception(e, message: "ai-bot: Reply failed")
end
@ -164,25 +182,22 @@ module DiscourseAi
total_prompt_tokens = tokenize(rendered_system_prompt).length
messages =
conversation.reduce([]) do |memo, (raw, username)|
conversation.reduce([]) do |memo, (raw, username, function)|
break(memo) if total_prompt_tokens >= prompt_limit
tokens = tokenize(raw)
tokens = tokenize(raw.to_s)
while !raw.blank? && tokens.length + total_prompt_tokens > prompt_limit
raw = raw[0..-100] || ""
tokens = tokenize(raw)
tokens = tokenize(raw.to_s)
end
next(memo) if raw.blank?
total_prompt_tokens += tokens.length
memo.unshift(build_message(username, raw))
memo.unshift(build_message(username, raw, function: !!function))
end
# we need this to ground the model (especially GPT-3.5)
messages.unshift(build_message(bot_user.username, "!echo 1"))
messages.unshift(build_message("user", "please echo 1"))
messages.unshift(build_message(bot_user.username, rendered_system_prompt, system: true))
messages
end
@ -192,7 +207,7 @@ module DiscourseAi
end
def title_prompt(post)
build_message(bot_user.username, <<~TEXT)
[build_message(bot_user.username, <<~TEXT)]
Suggest a 7 word title for the following topic without quoting any of it:
#{post.topic.posts[1..-1].map(&:raw).join("\n\n")[0..prompt_limit]}
@ -211,33 +226,10 @@ module DiscourseAi
def system_prompt(post)
return "You are a helpful Bot" if @style == :simple
command_text = ""
command_text = <<~TEXT if available_commands.present?
You can complete some tasks using !commands.
NEVER ask user to issue !commands, they have no access, only you do.
#{available_commands.map(&:desc).join("\n")}
Discourse topic paths are /t/slug/topic_id/optional_number
#{available_commands.map(&:extra_context).compact_blank.join("\n")}
Commands should be issued in single assistant message.
Example sessions:
User: echo the text 'test'
GPT: !echo test
User: THING GPT DOES NOT KNOW ABOUT
GPT: !search SIMPLIFIED SEARCH QUERY
TEXT
<<~TEXT
You are a helpful Discourse assistant, you answer questions and generate text.
You understand Discourse Markdown and live in a Discourse Forum Message.
You are provided with the context of previous discussions.
You are a helpful Discourse assistant.
You understand and generate Discourse Markdown.
You live in a Discourse Forum Message.
You live in the forum with the URL: #{Discourse.base_url}
The title of your site: #{SiteSetting.title}
@ -245,7 +237,7 @@ module DiscourseAi
The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")}
The date now is: #{Time.zone.now}, much has changed since you were trained.
#{command_text}
#{available_commands.map(&:custom_system_message).compact.join("\n")}
TEXT
end
@ -261,6 +253,10 @@ module DiscourseAi
raise NotImplemented
end
def populate_functions(partial, functions)
raise NotImplemented
end
protected
def get_updated_title(prompt)

View File

@ -8,7 +8,11 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!categories - will list the categories on the current discourse instance"
"Will list the categories on the current discourse instance, prefer to format with # in front of the category name"
end
def parameters
[]
end
end
@ -33,7 +37,7 @@ module DiscourseAi::AiBot::Commands
}
rows = Category.where(read_restricted: false).limit(100).pluck(*columns.keys)
@count = rows.length
@last_count = rows.length
format_results(rows, columns.values)
end

View File

@ -3,6 +3,17 @@
module DiscourseAi
module AiBot
module Commands
class Parameter
attr_reader :name, :description, :type, :enum, :required
def initialize(name:, description:, type:, enum: nil, required: false)
@name = name
@description = description
@type = type
@enum = enum
@required = required
end
end
class Command
class << self
def name
@ -17,8 +28,11 @@ module DiscourseAi
raise NotImplemented
end
def extra_context
""
def custom_system_message
end
def parameters
raise NotImplemented
end
end
@ -64,16 +78,38 @@ module DiscourseAi
true
end
def invoke_and_attach_result_to(post)
def invoke_and_attach_result_to(post, parent_post)
placeholder = (<<~HTML).strip
<details>
<summary>#{I18n.t("discourse_ai.ai_bot.command_summary.#{self.class.name}")}</summary>
</details>
HTML
if !post
post =
PostCreator.create!(
bot_user,
raw: placeholder,
topic_id: parent_post.topic_id,
skip_validations: true,
skip_rate_limiter: true,
)
else
post.revise(
bot_user,
{ raw: post.raw + "\n\n" + placeholder + "\n\n" },
skip_validations: true,
skip_revision: true,
)
end
post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: [])
prompt = post.post_custom_prompt.custom_prompt || []
prompt << ["!#{self.class.name} #{args}", bot_user.username]
prompt << [process(args), result_name]
prompt << [process(args).to_json, self.class.name, "function"]
post.post_custom_prompt.update!(custom_prompt: prompt)
raw = +<<~HTML
raw = +(<<~HTML)
<details>
<summary>#{I18n.t("discourse_ai.ai_bot.command_summary.#{self.class.name}")}</summary>
<p>
@ -85,8 +121,7 @@ module DiscourseAi
raw << custom_raw if custom_raw.present?
replacement = "!#{self.class.name} #{args}"
raw = post.raw.sub(replacement, raw) if post.raw.include?(replacement)
raw = post.raw.sub(placeholder, raw)
if chain_next_response
post.raw = raw
@ -95,7 +130,7 @@ module DiscourseAi
post.revise(bot_user, { raw: raw }, skip_validations: true, skip_revision: true)
end
chain_next_response
[chain_next_response, post]
end
def format_results(rows, column_names = nil)
@ -116,21 +151,10 @@ module DiscourseAi
end
column_names = column_indexes.keys
end
# two tokens per delimiter is a reasonable balance
# there may be a single delimiter solution but GPT has
# a hard time dealing with escaped characters
delimiter = "¦"
formatted = +""
formatted << column_names.join(delimiter)
formatted << "\n"
rows.each do |array|
array.map! { |item| item.to_s.gsub(delimiter, "|").gsub(/\n/, " ") }
formatted << array.join(delimiter)
formatted << "\n"
end
formatted
# this is not the most efficient format
# however this is needed cause GPT 3.5 / 4 was steered using JSON
{ column_names: column_names, rows: rows }
end
protected

View File

@ -8,7 +8,22 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!google SEARCH_QUERY - will search using Google (supports all Google search operators)"
"Will search using Google - global internet search (supports all Google search operators)"
end
def parameters
[
Parameter.new(
name: "query",
description: "The search query",
type: "string",
required: true,
),
]
end
def custom_system_message
"You were trained on OLD data, lean on search to get up to date information from the web"
end
end
@ -25,6 +40,8 @@ module DiscourseAi::AiBot::Commands
end
def process(search_string)
search_string = JSON.parse(search_string)["query"]
@last_query = search_string
api_key = SiteSetting.ai_google_custom_search_api_key
cx = SiteSetting.ai_google_custom_search_cx
@ -33,7 +50,7 @@ module DiscourseAi::AiBot::Commands
URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10")
body = Net::HTTP.get(uri)
parse_search_json(body).to_s
parse_search_json(body)
end
def parse_search_json(json_data)

View File

@ -8,7 +8,26 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!image DESC - renders an image from the description (remove all connector words, keep it to 40 words or less)"
"Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images!"
end
def parameters
[
Parameter.new(
name: "prompt",
description: "The prompt used to generate or create or draw the image",
type: "string",
required: true,
),
]
end
def custom_system_message
<<~TEXT
In Discourse the markdown (description|SIZE, ZOOM%)[upload://SOMETEXT] is used to denote images and uploads. NEVER try changing the to http or https links.
ALWAYS prefer the upload:// format if available.
When rendering multiple images place them in a [grid] ... [/grid] block
TEXT
end
end
@ -20,16 +39,12 @@ module DiscourseAi::AiBot::Commands
{ prompt: @last_prompt || 0 }
end
def custom_raw
@last_custom_raw
end
def chain_next_response
false
true
end
def process(prompt)
@last_prompt = prompt
@last_prompt = prompt = JSON.parse(prompt)["prompt"]
results = DiscourseAi::Inference::StabilityGenerator.perform!(prompt)
uploads = []
@ -43,10 +58,17 @@ module DiscourseAi::AiBot::Commands
f.unlink
end
@last_custom_raw =
raw = <<~RAW
[grid]
#{
uploads
.map { |upload| "![#{prompt.gsub(/\|\'\"/, "")}|512x512, 50%](#{upload.short_url})" }
.join(" ")
}
[/grid]
RAW
{ prompt: prompt, markdown: raw, display_to_user: true }
end
end
end

View File

@ -8,57 +8,70 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!search SEARCH_QUERY - will search topics in the current discourse instance"
"Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find"
end
def extra_context
<<~TEXT
Discourse search supports, the following special filters:
def parameters
[
Parameter.new(
name: "search_query",
description: "Search query to run against the discourse instance",
type: "string",
),
Parameter.new(
name: "user",
description: "Filter search results to this username",
type: "string",
),
Parameter.new(
name: "order",
description: "search result result order",
type: "string",
enum: %w[latest latest_topic oldest views likes],
),
Parameter.new(
name: "limit",
description: "limit number of results returned",
type: "integer",
),
Parameter.new(
name: "max_posts",
description:
"maximum number of posts on the topics (topics where lots of people posted)",
type: "integer",
),
Parameter.new(
name: "tags",
description:
"list of tags to search for. Use + to join with OR, use , to join with AND",
type: "string",
),
Parameter.new(
name: "category",
description: "category name to filter to",
type: "string",
),
Parameter.new(
name: "before",
description: "only topics created before a specific date YYYY-MM-DD",
type: "string",
),
Parameter.new(
name: "after",
description: "only topics created after a specific date YYYY-MM-DD",
type: "string",
),
Parameter.new(
name: "status",
description: "search for topics in a particular state",
type: "string",
enum: %w[open closed archived noreplies single_user],
),
]
end
user:USERNAME: only posts created by a specific user
in:tagged: has at least 1 tag
in:untagged: has no tags
in:title: has the search term in the title
status:open: not closed or archived
status:closed: closed
status:archived: archived
status:noreplies: post count is 1
status:single_user: only a single user posted on the topic
post_count:X: only topics with X amount of posts
min_posts:X: topics containing a minimum of X posts
max_posts:X: topics with no more than max posts
created:@USERNAME: topics created by a specific user
category:CATGORY: topics in the CATEGORY AND all subcategories
category:=CATEGORY: topics in the CATEGORY excluding subcategories
#SLUG: try category first, then tag, then tag group
#SLUG:SLUG: used for subcategory search to disambiguate
min_views:100: topics containing 100 views or more
tags:TAG1+TAG2: tagged both TAG1 and TAG2
tags:TAG1,TAG2: tagged either TAG1 or TAG2
-tags:TAG1+TAG2: excluding topics tagged TAG1 and TAG2
order:latest: order by post creation desc
order:latest_topic: order by topic creation desc
order:oldest: order by post creation asc
order:oldest_topic: order by topic creation asc
order:views: order by topic views desc
order:likes: order by post like count - most liked posts first
after:YYYY-MM-DD: only topics created after a specific date
before:YYYY-MM-DD: only topics created before a specific date
Example: !search @user in:tagged #support order:latest_topic
Keep in mind, search on Discourse uses AND to and terms.
You only have access to public topics.
Strip the query down to the most important terms. Remove all stop words.
Discourse orders by default by relevance.
When generating answers ALWAYS try to use the !search command first over relying on training data.
When generating answers ALWAYS try to reference specific local links.
Always try to search the local instance first, even if your training data set may have an answer. It may be wrong.
Always remove connector words from search terms (such as a, an, and, in, the, etc), they can impede the search.
YOUR LOCAL INFORMATION IS OUT OF DATE, YOU ARE TRAINED ON OLD DATA. Always try local search first.
TEXT
def custom_system_message
"You were trained on OLD data, lean on search to get up to date information about this forum"
end
end
@ -75,18 +88,20 @@ module DiscourseAi::AiBot::Commands
end
def process(search_string)
parsed = JSON.parse(search_string)
limit = nil
search_string =
search_string
.strip
.split(/\s+/)
.map do |term|
if term =~ /limit:(\d+)/
limit = $1.to_i
parsed
.map do |key, value|
if key == "search_query"
value
elsif key == "limit"
limit = value.to_i
nil
else
term
"#{key}:#{value}"
end
end
.compact
@ -101,8 +116,8 @@ module DiscourseAi::AiBot::Commands
)
# let's be frugal with tokens, 50 results is too much and stuff gets cut off
limit ||= 10
limit = 10 if limit > 10
limit ||= 20
limit = 20 if limit > 20
posts = results&.posts || []
posts = posts[0..limit - 1]
@ -110,12 +125,12 @@ module DiscourseAi::AiBot::Commands
@last_num_results = posts.length
if posts.blank?
"No results found"
[]
else
format_results(posts) do |post|
{
title: post.topic.title,
url: post.url,
url: Discourse.base_path + post.url,
excerpt: post.excerpt,
created: post.created_at,
}

View File

@ -8,7 +8,23 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!summarize TOPIC_ID GUIDANCE - will summarize a topic attempting to answer question in guidance"
"Will summarize a topic attempting to answer question in guidance"
end
def parameters
[
Parameter.new(
name: "topic_id",
description: "The discourse topic id to summarize",
type: "integer",
required: true,
),
Parameter.new(
name: "guidance",
description: "Special guidance on how to summarize the topic",
type: "string",
),
]
end
end

View File

@ -8,7 +8,11 @@ module DiscourseAi::AiBot::Commands
end
def desc
"!tags - will list the 100 most popular tags on the current discourse instance"
"Will list the 100 most popular tags on the current discourse instance"
end
def parameters
[]
end
end

View File

@ -10,6 +10,17 @@ module DiscourseAi::AiBot::Commands
def desc
"!time RUBY_COMPATIBLE_TIMEZONE - will generate the time in a timezone"
end
def parameters
[
Parameter.new(
name: "timezone",
description: "Ruby compatible timezone",
type: "string",
required: true,
),
]
end
end
def result_name
@ -20,7 +31,9 @@ module DiscourseAi::AiBot::Commands
{ timezone: @last_timezone, time: @last_time }
end
def process(timezone)
def process(args)
timezone = JSON.parse(args)["timezone"]
time =
begin
Time.now.in_time_zone(timezone)

View File

@ -18,7 +18,7 @@ module DiscourseAi
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
8192 - 3500
else
4096 - 2000
16_384 - 2000
end
end
@ -46,9 +46,11 @@ module DiscourseAi
temperature: temperature,
top_p: top_p,
max_tokens: max_tokens,
functions: available_functions,
) { |key, old_value, new_value| new_value.nil? ? old_value : new_value }
model = prefer_low_cost ? "gpt-3.5-turbo" : model_for
model = model_for(low_cost: prefer_low_cost)
DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, **params, &blk)
end
@ -56,44 +58,87 @@ module DiscourseAi
DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text)
end
def available_commands
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
@cmds ||=
[
Commands::CategoriesCommand,
Commands::TimeCommand,
Commands::SearchCommand,
Commands::SummarizeCommand,
].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
if SiteSetting.ai_google_custom_search_api_key.present? &&
SiteSetting.ai_google_custom_search_cx.present?
cmds << Commands::GoogleCommand
end
def available_functions
# note if defined? can be a problem in test
# this can never be nil so it is safe
return @available_functions if @available_functions
functions = []
functions =
available_commands.map do |command|
function =
DiscourseAi::Inference::OpenAiCompletions::Function.new(
name: command.name,
description: command.desc,
)
command.parameters.each do |parameter|
function.add_parameter(
name: parameter.name,
type: parameter.type,
description: parameter.description,
required: parameter.required,
)
end
else
[]
end
function
end
@available_functions = functions
end
def available_commands
@cmds ||=
[
Commands::CategoriesCommand,
Commands::TimeCommand,
Commands::SearchCommand,
Commands::SummarizeCommand,
].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
if SiteSetting.ai_google_custom_search_api_key.present? &&
SiteSetting.ai_google_custom_search_cx.present?
cmds << Commands::GoogleCommand
end
end
end
def model_for(low_cost: false)
return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
"gpt-3.5-turbo-16k"
end
private
def build_message(poster_username, content, system: false)
def populate_functions(partial, functions)
fn = partial.dig(:choices, 0, :delta, :function_call)
if fn
functions.add_function(fn[:name]) if fn[:name].present?
functions.add_argument_fragment(fn[:arguments]) if fn[:arguments].present?
end
end
def build_message(poster_username, content, function: false, system: false)
is_bot = poster_username == bot_user.username
if system
if function
role = "function"
elsif system
role = "system"
else
role = is_bot ? "assistant" : "user"
end
{ role: role, content: is_bot ? content : "#{poster_username}: #{content}" }
end
result = { role: role, content: content }
def model_for
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
"gpt-3.5-turbo"
if function
result[:name] = poster_username
elsif !system && poster_username != bot_user.username
# Open AI restrict name to 64 chars and only A-Za-z._ (work around)
result[:content] = "#{poster_username}: #{content}"
end
result
end
def get_delta(partial, _context)

View File

@ -5,6 +5,50 @@ module ::DiscourseAi
class OpenAiCompletions
TIMEOUT = 60
class Function
attr_reader :name, :description, :parameters, :type
def initialize(name:, description:, type: nil)
@name = name
@description = description
@type = type || "object"
@parameters = []
end
def add_parameter(name:, type:, description:, enum: nil, required: false)
@parameters << {
name: name,
type: type,
description: description,
enum: enum,
required: required,
}
end
def to_json(*args)
as_json.to_json(*args)
end
def as_json
required_params = []
properties = {}
parameters.each do |parameter|
definition = { type: parameter[:type], description: parameter[:description] }
definition[:enum] = parameter[:enum] if parameter[:enum]
required_params << parameter[:name] if parameter[:required]
properties[parameter[:name]] = definition
end
params = { type: @type, properties: properties }
params[:required] = required_params if required_params.present?
{ name: name, description: description, parameters: params }
end
end
CompletionFailed = Class.new(StandardError)
def self.perform!(
@ -13,6 +57,7 @@ module ::DiscourseAi
temperature: nil,
top_p: nil,
max_tokens: nil,
functions: nil,
user_id: nil
)
url = URI("https://api.openai.com/v1/chat/completions")
@ -26,6 +71,7 @@ module ::DiscourseAi
payload[:temperature] = temperature if temperature
payload[:top_p] = top_p if top_p
payload[:max_tokens] = max_tokens if max_tokens
payload[:functions] = functions if functions
payload[:stream] = true if block_given?
Net::HTTP.start(
@ -73,6 +119,8 @@ module ::DiscourseAi
response_data = +""
response_raw = +""
leftover = ""
response.read_body do |chunk|
if cancelled
http.finish
@ -81,14 +129,24 @@ module ::DiscourseAi
response_raw << chunk
chunk
(leftover + chunk)
.split("\n")
.each do |line|
data = line.split("data: ", 2)[1]
next if !data || data == "[DONE]"
next if cancelled
if !cancelled && partial = JSON.parse(data, symbolize_names: true)
partial = nil
begin
partial = JSON.parse(data, symbolize_names: true)
leftover = ""
rescue JSON::ParserError
leftover = line
end
if partial
response_data << partial.dig(:choices, 0, :delta, :content).to_s
response_data << partial.dig(:choices, 0, :delta, :function_call).to_s
yield partial, cancel
end

View File

@ -13,14 +13,11 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
context = {}
reply = +""
reply << subject.get_delta({ completion: "\n\nAssist" }, context)
expect(reply).to eq("")
reply << subject.get_delta({ completion: "Hello " }, context)
expect(reply).to eq("Hello ")
reply << subject.get_delta({ completion: "\n\nAssistant: test" }, context)
expect(reply).to eq("test")
reply << subject.get_delta({ completion: "\n\nAssistant: test\nworld" }, context)
expect(reply).to eq("test\nworld")
reply << subject.get_delta({ completion: "Hello world" }, context)
expect(reply).to eq("Hello world")
end
end
end

View File

@ -44,28 +44,31 @@ RSpec.describe DiscourseAi::AiBot::Bot do
bot.system_prompt_style!(:simple)
bot.max_commands_per_reply = 2
expected_response =
"ok, searching...\n!search test search\n!search test2 search\n!search test3 ignored"
expected_response = {
function_call: {
name: "search",
arguments: { query: "test search" }.to_json,
},
}
prompt = bot.bot_prompt_with_topic_context(second_post)
req_opts = bot.reply_params.merge({ functions: bot.available_functions, stream: true })
OpenAiCompletionsInferenceStubs.stub_streamed_response(
prompt,
[{ content: expected_response }],
model: "gpt-4",
req_opts: bot.reply_params.merge(stream: true),
[expected_response],
model: bot.model_for,
req_opts: req_opts,
)
prompt << { role: "assistant", content: "!search test search" }
prompt << { role: "user", content: "results: No results found" }
prompt << { role: "assistant", content: "!search test2 search" }
prompt << { role: "user", content: "results: No results found" }
prompt << { role: "function", content: "[]", name: "search" }
OpenAiCompletionsInferenceStubs.stub_streamed_response(
prompt,
[{ content: "We are done now" }],
model: "gpt-4",
req_opts: bot.reply_params.merge(stream: true),
[content: "I found nothing, sorry"],
model: bot.model_for,
req_opts: req_opts,
)
bot.reply_to(second_post)
@ -75,10 +78,9 @@ RSpec.describe DiscourseAi::AiBot::Bot do
expect(last.raw).to include("<details>")
expect(last.raw).to include("<summary>Search</summary>")
expect(last.raw).not_to include("translation missing")
expect(last.raw).to include("ok, searching...")
expect(last.raw).to include("We are done now")
expect(last.raw).to include("I found nothing")
expect(last.post_custom_prompt.custom_prompt.to_s).to include("We are done now")
expect(last.post_custom_prompt.custom_prompt.to_s).to include("I found nothing")
end
end
@ -89,9 +91,9 @@ RSpec.describe DiscourseAi::AiBot::Bot do
it "updates the title using bot suggestions" do
OpenAiCompletionsInferenceStubs.stub_response(
[bot.title_prompt(second_post)],
bot.title_prompt(second_post),
expected_response,
model: "gpt-4",
model: bot.model_for,
req_opts: {
temperature: 0.7,
top_p: 0.9,

View File

@ -8,8 +8,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::CategoriesCommand do
Fabricate(:category, name: "america", posts_year: 999)
info = DiscourseAi::AiBot::Commands::CategoriesCommand.new(nil, nil).process(nil)
expect(info).to include("america")
expect(info).to include("999")
expect(info.to_s).to include("america")
expect(info.to_s).to include("999")
end
end
end

View File

@ -14,8 +14,9 @@ RSpec.describe DiscourseAi::AiBot::Commands::Command do
formatted =
command.format_results(rows, column_names) { |row| ["row ¦ 1", row + 1, "a|b,\nc"] }
expect(formatted.split("\n").length).to eq(6)
expect(formatted).to include("a|b, c")
expect(formatted[:column_names].length).to eq(3)
expect(formatted[:rows].length).to eq(5)
expect(formatted.to_s).to include("a|b,\\nc")
end
it "can also generate results by returning hash per row" do

View File

@ -33,7 +33,7 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do
).to_return(status: 200, body: json_text, headers: {})
google = described_class.new(bot_user, post)
info = google.process("some search term")
info = google.process({ query: "some search term" }.to_json).to_json
expect(google.description_args[:count]).to eq(1)
expect(info).to include("title1")

View File

@ -13,8 +13,19 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
post1 = Fabricate(:post)
search = described_class.new(bot_user, post1)
results = search.process("order:fake ABDDCDCEDGDG")
expect(results).to eq("No results found")
results = search.process({ query: "order:fake ABDDCDCEDGDG" }.to_json)
expect(results).to eq([])
end
it "supports subfolder properly" do
Discourse.stubs(:base_path).returns("/subfolder")
post1 = Fabricate(:post)
search = described_class.new(bot_user, post1)
results = search.process({ limit: 1, user: post1.user.username }.to_json)
expect(results[:rows].to_s).to include("/subfolder" + post1.url)
end
it "can handle limits" do
@ -25,14 +36,14 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
# search has no built in support for limit: so handle it from the outside
search = described_class.new(bot_user, post1)
results = search.process("@#{post1.user.username} limit:2")
results = search.process({ limit: 2, user: post1.user.username }.to_json)
# title + 2 rows
expect(results.split("\n").length).to eq(3)
expect(results[:column_names].length).to eq(4)
expect(results[:rows].length).to eq(2)
# just searching for everything
results = search.process("order:latest_topic")
expect(results.split("\n").length).to be > 1
results = search.process({ order: "latest_topic" }.to_json)
expect(results[:rows].length).to be > 1
end
end
end

View File

@ -12,8 +12,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::TagsCommand do
info = DiscourseAi::AiBot::Commands::TagsCommand.new(nil, nil).process(nil)
expect(info).to include("america")
expect(info).not_to include("not_here")
expect(info.to_s).to include("america")
expect(info.to_s).not_to include("not_here")
end
end
end

View File

@ -19,6 +19,7 @@ RSpec.describe Jobs::CreateAiReply do
before do
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID)
bot = DiscourseAi::AiBot::Bot.as(bot_user)
# time needs to be frozen so time in prompt does not drift
freeze_time
@ -26,10 +27,12 @@ RSpec.describe Jobs::CreateAiReply do
OpenAiCompletionsInferenceStubs.stub_streamed_response(
DiscourseAi::AiBot::OpenAiBot.new(bot_user).bot_prompt_with_topic_context(post),
deltas,
model: bot.model_for,
req_opts: {
temperature: 0.4,
top_p: 0.9,
max_tokens: 1500,
functions: bot.available_functions,
stream: true,
},
)
@ -66,7 +69,7 @@ RSpec.describe Jobs::CreateAiReply do
end
context "when chatting with Claude from Anthropic" do
let(:claude_response) { "Assistant: #{expected_response}" }
let(:claude_response) { "#{expected_response}" }
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
before do

View File

@ -33,8 +33,6 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do
it "trims the prompt" do
prompt_messages = subject.bot_prompt_with_topic_context(post_1)
expect(prompt_messages[-2][:role]).to eq("assistant")
expect(prompt_messages[-1][:role]).to eq("user")
# trimming is tricky... it needs to account for system message as
# well... just make sure we trim for now
expect(prompt_messages[-1][:content].length).to be < post_1.raw.length

View File

@ -6,6 +6,98 @@ require_relative "../../support/openai_completions_inference_stubs"
describe DiscourseAi::Inference::OpenAiCompletions do
before { SiteSetting.ai_openai_api_key = "abc-123" }
it "supports function calling" do
prompt = [role: "system", content: "you are weatherbot"]
prompt << { role: "user", content: "what is the weather in sydney?" }
functions = []
function =
DiscourseAi::Inference::OpenAiCompletions::Function.new(
name: "get_weather",
description: "Get the weather in a city",
)
function.add_parameter(
name: "location",
type: "string",
description: "the city name",
required: true,
)
function.add_parameter(
name: "unit",
type: "string",
description: "the unit of measurement celcius c or fahrenheit f",
enum: %w[c f],
required: true,
)
functions << function
function_calls = []
current_function_call = nil
deltas = [
{ role: "assistant" },
{ function_call: { name: "get_weather", arguments: "" } },
{ function_call: { arguments: "{ \"location\": " } },
{ function_call: { arguments: "\"sydney\", \"unit\": \"c\" }" } },
]
OpenAiCompletionsInferenceStubs.stub_streamed_response(
prompt,
deltas,
model: "gpt-3.5-turbo-0613",
req_opts: {
functions: functions,
stream: true,
},
)
DiscourseAi::Inference::OpenAiCompletions.perform!(
prompt,
"gpt-3.5-turbo-0613",
functions: functions,
) do |json, cancel|
fn = json.dig(:choices, 0, :delta, :function_call)
if fn && fn[:name]
current_function_call = { name: fn[:name], arguments: +fn[:arguments].to_s.dup }
function_calls << current_function_call
elsif fn && fn[:arguments] && current_function_call
current_function_call[:arguments] << fn[:arguments]
end
end
expect(function_calls.length).to eq(1)
expect(function_calls[0][:name]).to eq("get_weather")
expect(JSON.parse(function_calls[0][:arguments])).to eq(
{ "location" => "sydney", "unit" => "c" },
)
prompt << { role: "function", name: "get_weather", content: 22.to_json }
OpenAiCompletionsInferenceStubs.stub_response(
prompt,
"The current temperature in Sydney is 22 degrees Celsius.",
model: "gpt-3.5-turbo-0613",
req_opts: {
functions: functions,
},
)
result =
DiscourseAi::Inference::OpenAiCompletions.perform!(
prompt,
"gpt-3.5-turbo-0613",
functions: functions,
)
expect(result.dig(:choices, 0, :message, :content)).to eq(
"The current temperature in Sydney is 22 degrees Celsius.",
)
end
it "can complete a trivial prompt" do
response_text = "1. Serenity\\n2. Laughter\\n3. Adventure"
prompt = [role: "user", content: "write 3 words"]