DEV: port directory structure to Zeitwerk (#319)

Previous to this change we relied on explicit loading for a files in Discourse AI.

This had a few downsides:

- Busywork whenever you add a file (an extra require relative)
- We were not keeping to conventions internally ... some places were OpenAI others are OpenAi
- Autoloader did not work which lead to lots of full application broken reloads when developing.

This moves all of DiscourseAI into a Zeitwerk compatible structure.

It also leaves some minimal amount of manual loading (automation - which is loading into an existing namespace that may or may not be there)

To avoid needing /lib/discourse_ai/... we mount a namespace thus we are able to keep /lib pointed at ::DiscourseAi

Various files were renamed to get around zeitwerk rules and minimize usage of custom inflections

Though we can get custom inflections to work it is not worth it, will require a Discourse core patch which means we create a hard dependency.
This commit is contained in:
Sam 2023-11-29 15:17:46 +11:00 committed by GitHub
parent 0b9947771c
commit 6ddc17fd61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 274 additions and 353 deletions

View File

@ -11,7 +11,7 @@ module Jobs
return if post.uploads.none? { |u| FileHelper.is_supported_image?(u.url) }
DiscourseAi::PostClassificator.new(DiscourseAi::NSFW::NSFWClassification.new).classify!(post)
DiscourseAi::PostClassificator.new(DiscourseAi::Nsfw::Classification.new).classify!(post)
end
end
end

View File

@ -53,6 +53,8 @@ en:
ai_helper_allowed_in_pm: "Enable the composer's AI helper in PMs."
ai_helper_model: "Model to use for the AI helper."
ai_helper_custom_prompts_allowed_groups: "Users on these groups will see the custom prompt option in the AI helper."
ai_helper_automatic_chat_thread_title_delay: "Delay in minutes before the AI helper automatically sets the chat thread title."
ai_helper_automatic_chat_thread_title: "Automatically set the chat thread titles based on thread contents."
ai_embeddings_enabled: "Enable the embeddings module."
ai_embeddings_discourse_service_api_endpoint: "URL where the API is running for the embeddings module"

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
module DiscourseAi
module AiBot
module Commands
class Parameter
attr_reader :item_type, :name, :description, :type, :enum, :required
def initialize(name:, description:, type:, enum: nil, required: false, item_type: nil)
@name = name
@description = description
@type = type
@enum = enum
@required = required
@item_type = item_type
end
end
end
end
end

View File

@ -27,36 +27,6 @@ module DiscourseAi
end
end
def load_files
require_relative "jobs/regular/create_ai_reply"
require_relative "jobs/regular/update_ai_bot_pm_title"
require_relative "bot"
require_relative "anthropic_bot"
require_relative "open_ai_bot"
require_relative "commands/command"
require_relative "commands/search_command"
require_relative "commands/categories_command"
require_relative "commands/tags_command"
require_relative "commands/time_command"
require_relative "commands/summarize_command"
require_relative "commands/image_command"
require_relative "commands/google_command"
require_relative "commands/read_command"
require_relative "commands/setting_context_command"
require_relative "commands/search_settings_command"
require_relative "commands/db_schema_command"
require_relative "commands/dall_e_command"
require_relative "personas/persona"
require_relative "personas/artist"
require_relative "personas/general"
require_relative "personas/sql_helper"
require_relative "personas/settings_explorer"
require_relative "personas/researcher"
require_relative "personas/creative"
require_relative "personas/dall_e_3"
require_relative "site_settings_extension"
end
def inject_into(plugin)
plugin.on(:site_setting_changed) do |name, _old_value, _new_value|
if name == :ai_bot_enabled_chat_bots || name == :ai_bot_enabled
@ -76,7 +46,7 @@ module DiscourseAi
scope.user.in_any_groups?(SiteSetting.ai_bot_allowed_groups_map)
end,
) do
Personas
DiscourseAi::AiBot::Personas
.all(user: scope.user)
.map do |persona|
{ id: persona.id, name: persona.name, description: persona.description }
@ -135,8 +105,8 @@ module DiscourseAi
post.topic.custom_fields[REQUIRE_TITLE_UPDATE] = true
post.topic.save_custom_fields
end
Jobs.enqueue(:create_ai_reply, post_id: post.id, bot_user_id: bot_id)
Jobs.enqueue_in(
::Jobs.enqueue(:create_ai_reply, post_id: post.id, bot_user_id: bot_id)
::Jobs.enqueue_in(
5.minutes,
:update_ai_bot_pm_title,
post_id: post.id,

46
lib/ai_bot/personas.rb Normal file
View File

@ -0,0 +1,46 @@
# frozen_string_literal: true
module DiscourseAi
module AiBot
module Personas
def self.system_personas
@system_personas ||= {
Personas::General => -1,
Personas::SqlHelper => -2,
Personas::Artist => -3,
Personas::SettingsExplorer => -4,
Personas::Researcher => -5,
Personas::Creative => -6,
Personas::DallE3 => -7,
}
end
def self.system_personas_by_id
@system_personas_by_id ||= system_personas.invert
end
def self.all(user:)
# this needs to be dynamic cause site settings may change
all_available_commands = Persona.all_available_commands
AiPersona.all_personas.filter do |persona|
next false if !user.in_any_groups?(persona.allowed_group_ids)
if persona.system
instance = persona.new
(
instance.required_commands == [] ||
(instance.required_commands - all_available_commands).empty?
)
else
true
end
end
end
def self.find_by(id: nil, name: nil, user:)
all(user: user).find { |persona| persona.id == id || persona.name == name }
end
end
end
end

View File

@ -3,46 +3,6 @@
module DiscourseAi
module AiBot
module Personas
def self.system_personas
@system_personas ||= {
Personas::General => -1,
Personas::SqlHelper => -2,
Personas::Artist => -3,
Personas::SettingsExplorer => -4,
Personas::Researcher => -5,
Personas::Creative => -6,
Personas::DallE3 => -7,
}
end
def self.system_personas_by_id
@system_personas_by_id ||= system_personas.invert
end
def self.all(user:)
personas =
AiPersona.all_personas.filter { |persona| user.in_any_groups?(persona.allowed_group_ids) }
# this needs to be dynamic cause site settings may change
all_available_commands = Persona.all_available_commands
personas.filter do |persona|
if persona.system
instance = persona.new
(
instance.required_commands == [] ||
(instance.required_commands - all_available_commands).empty?
)
else
true
end
end
end
def self.find_by(id: nil, name: nil, user:)
all(user: user).find { |persona| persona.id == id || persona.name == name }
end
class Persona
def self.name
I18n.t("discourse_ai.ai_bot.personas.#{to_s.demodulize.underscore}.name")

View File

@ -25,7 +25,7 @@ module DiscourseAi
end
def generate_and_send_prompt(completion_prompt, input, user)
llm = DiscourseAi::Completions::LLM.proxy(SiteSetting.ai_helper_model)
llm = DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model)
generic_prompt = completion_prompt.messages_with_input(input)

View File

@ -2,15 +2,6 @@
module DiscourseAi
module AiHelper
class EntryPoint
def load_files
require_relative "chat_thread_titler"
require_relative "jobs/regular/generate_chat_thread_title"
require_relative "assistant"
require_relative "painter"
require_relative "semantic_categorizer"
require_relative "topic_helper"
end
def inject_into(plugin)
plugin.register_seedfu_fixtures(
Rails.root.join("plugins", "discourse-ai", "db", "fixtures", "ai_helper"),
@ -22,7 +13,7 @@ module DiscourseAi
plugin.on(:chat_thread_created) do |thread|
next unless SiteSetting.composer_ai_helper_enabled
next unless SiteSetting.ai_helper_automatic_chat_thread_title
Jobs.enqueue_in(
::Jobs.enqueue_in(
SiteSetting.ai_helper_automatic_chat_thread_title_delay.minutes,
:generate_chat_thread_title,
thread_id: thread.id,

View File

@ -35,7 +35,7 @@ module DiscourseAi
You'll find the post between <input></input> XML tags.
TEXT
DiscourseAi::Completions::LLM.proxy(SiteSetting.ai_helper_model).completion!(prompt, user)
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).completion!(prompt, user)
end
end
end

View File

@ -3,7 +3,7 @@
module DiscourseAi
module Completions
module Dialects
class ChatGPT
class ChatGpt
def self.can_translate?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
end

View File

@ -13,9 +13,9 @@ module DiscourseAi
[
DiscourseAi::Completions::Endpoints::AwsBedrock,
DiscourseAi::Completions::Endpoints::Anthropic,
DiscourseAi::Completions::Endpoints::OpenAI,
DiscourseAi::Completions::Endpoints::Huggingface,
].detect(-> { raise DiscourseAi::Completions::LLM::UNKNOWN_MODEL }) do |ek|
DiscourseAi::Completions::Endpoints::OpenAi,
DiscourseAi::Completions::Endpoints::HuggingFace,
].detect(-> { raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL }) do |ek|
ek.can_contact?(model_name)
end
end

View File

@ -3,7 +3,7 @@
module DiscourseAi
module Completions
module Endpoints
class Huggingface < Base
class HuggingFace < Base
def self.can_contact?(model_name)
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2 Llama2-*-chat-hf].include?(model_name)
end

View File

@ -3,7 +3,7 @@
module DiscourseAi
module Completions
module Endpoints
class OpenAI < Base
class OpenAi < Base
def self.can_contact?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
end

View File

@ -1,26 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
class EntryPoint
def load_files
require_relative "dialects/chat_gpt"
require_relative "dialects/llama2_classic"
require_relative "dialects/orca_style"
require_relative "dialects/claude"
require_relative "endpoints/canned_response"
require_relative "endpoints/base"
require_relative "endpoints/anthropic"
require_relative "endpoints/aws_bedrock"
require_relative "endpoints/open_ai"
require_relative "endpoints/hugging_face"
require_relative "llm"
end
def inject_into(_)
end
end
end
end

View File

@ -14,7 +14,7 @@
#
module DiscourseAi
module Completions
class LLM
class Llm
UNKNOWN_MODEL = Class.new(StandardError)
def self.with_prepared_responses(responses)
@ -27,7 +27,7 @@ module DiscourseAi
dialects = [
DiscourseAi::Completions::Dialects::Claude,
DiscourseAi::Completions::Dialects::Llama2Classic,
DiscourseAi::Completions::Dialects::ChatGPT,
DiscourseAi::Completions::Dialects::ChatGpt,
DiscourseAi::Completions::Dialects::OrcaStyle,
]

View File

@ -3,21 +3,6 @@
module DiscourseAi
module Embeddings
class EntryPoint
def load_files
require_relative "vector_representations/base"
require_relative "vector_representations/all_mpnet_base_v2"
require_relative "vector_representations/text_embedding_ada_002"
require_relative "vector_representations/multilingual_e5_large"
require_relative "vector_representations/bge_large_en"
require_relative "strategies/truncation"
require_relative "jobs/regular/generate_embeddings"
require_relative "jobs/scheduled/embeddings_backfill"
require_relative "semantic_related"
require_relative "semantic_topic_query"
require_relative "semantic_search"
end
def inject_into(plugin)
# Include random topics in the suggested list *only* if there are no related topics.
plugin.register_modifier(

View File

@ -103,14 +103,14 @@ module DiscourseAi
TEXT
input: <<~TEXT,
Using this description, write a forum post about the subject inside the <input></input> XML tags:
<input>#{search_term}</input>
TEXT
post_insts: "Put the forum post between <ai></ai> tags.",
}
llm_response =
DiscourseAi::Completions::LLM.proxy(
DiscourseAi::Completions::Llm.proxy(
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).completion!(prompt, @guardian.user)

View File

@ -1,8 +1,8 @@
# frozen_string_literal: true
module DiscourseAi
module NSFW
class NSFWClassification
module Nsfw
class Classification
def type
:nsfw
end

View File

@ -1,18 +1,13 @@
# frozen_string_literal: true
module DiscourseAi
module NSFW
module Nsfw
class EntryPoint
def load_files
require_relative "nsfw_classification"
require_relative "jobs/regular/evaluate_post_uploads"
end
def inject_into(plugin)
nsfw_detection_cb =
Proc.new do |post|
if SiteSetting.ai_nsfw_detection_enabled &&
DiscourseAi::NSFW::NSFWClassification.new.can_classify?(post)
DiscourseAi::Nsfw::Classification.new.can_classify?(post)
Jobs.enqueue(:evaluate_post_uploads, post_id: post.id)
end
end

View File

@ -3,11 +3,6 @@
module DiscourseAi
module Sentiment
class EntryPoint
def load_files
require_relative "sentiment_classification"
require_relative "jobs/regular/post_sentiment_analysis"
end
def inject_into(plugin)
sentiment_analysis_cb =
Proc.new do |post|
@ -32,11 +27,11 @@ module DiscourseAi
grouped_sentiments =
DB.query(
<<~SQL,
SELECT
SELECT
DATE_TRUNC('day', p.created_at)::DATE AS posted_at,
#{sentiment_count_sql.call("positive")},
-#{sentiment_count_sql.call("negative")}
FROM
FROM
classification_results AS cr
INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
INNER JOIN topics t ON t.id = p.topic_id
@ -84,7 +79,7 @@ module DiscourseAi
grouped_emotions =
DB.query(
<<~SQL,
SELECT
SELECT
u.trust_level AS trust_level,
#{emotion_count_clause.call("sadness")},
#{emotion_count_clause.call("surprise")},

View File

@ -1,103 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class BasicTokenizer
class << self
def tokenizer
raise NotImplementedError
end
def tokenize(text)
tokenizer.encode(text).tokens
end
def size(text)
tokenize(text).size
end
def truncate(text, max_length)
# Fast track the common case where the text is already short enough.
return text if text.size < max_length
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
end
def can_expand_tokens?(text, addition, max_length)
return true if text.size + addition.size < max_length
tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
end
end
end
class BertTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/bert-base-uncased.json")
end
end
class AnthropicTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/claude-v1-tokenization.json")
end
end
class AllMpnetBaseV2Tokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/all-mpnet-base-v2.json")
end
end
class Llama2Tokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/llama-2-70b-chat-hf.json")
end
end
class MultilingualE5LargeTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/multilingual-e5-large.json")
end
end
class BgeLargeEnTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||= Tokenizers.from_file("./plugins/discourse-ai/tokenizers/bge-large-en.json")
end
end
class OpenAiTokenizer < BasicTokenizer
class << self
def tokenizer
@@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
end
def tokenize(text)
tokenizer.encode(text)
end
def truncate(text, max_length)
# Fast track the common case where the text is already short enough.
return text if text.size < max_length
tokenizer.decode(tokenize(text).take(max_length))
rescue Tiktoken::UnicodeError
max_length = max_length - 1
retry
end
def can_expand_tokens?(text, addition, max_length)
return true if text.size + addition.size < max_length
tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length
end
end
end
end
end

View File

@ -3,18 +3,6 @@
module DiscourseAi
module Summarization
class EntryPoint
def load_files
require_relative "models/base"
require_relative "models/anthropic"
require_relative "models/discourse"
require_relative "models/open_ai"
require_relative "models/llama2"
require_relative "models/llama2_fine_tuned_orca_style"
require_relative "strategies/fold_content"
require_relative "strategies/truncate_content"
end
def inject_into(plugin)
foldable_models = [
Models::OpenAi.new("gpt-4", max_tokens: 8192),

View File

@ -19,7 +19,7 @@ module DiscourseAi
def summarize(content, user, &on_partial_blk)
opts = content.except(:contents)
llm = DiscourseAi::Completions::LLM.proxy(completion_model.model)
llm = DiscourseAi::Completions::Llm.proxy(completion_model.model)
chunks = split_into_chunks(llm.tokenizer, content[:contents])

View File

@ -0,0 +1,12 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class AllMpnetBaseV2Tokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/all-mpnet-base-v2.json")
end
end
end
end

View File

@ -0,0 +1,12 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class AnthropicTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/claude-v1-tokenization.json")
end
end
end
end

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class BasicTokenizer
class << self
def tokenizer
raise NotImplementedError
end
def tokenize(text)
tokenizer.encode(text).tokens
end
def size(text)
tokenize(text).size
end
def truncate(text, max_length)
# Fast track the common case where the text is already short enough.
return text if text.size < max_length
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
end
def can_expand_tokens?(text, addition, max_length)
return true if text.size + addition.size < max_length
tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
end
end
end
end
end

View File

@ -0,0 +1,12 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class BertTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/bert-base-uncased.json")
end
end
end
end

View File

@ -0,0 +1,11 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class BgeLargeEnTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||= Tokenizers.from_file("./plugins/discourse-ai/tokenizers/bge-large-en.json")
end
end
end
end

View File

@ -0,0 +1,12 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class Llama2Tokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/llama-2-70b-chat-hf.json")
end
end
end
end

View File

@ -0,0 +1,12 @@
# frozen_string_literal: true
module DiscourseAi
module Tokenizer
class MultilingualE5LargeTokenizer < BasicTokenizer
def self.tokenizer
@@tokenizer ||=
Tokenizers.from_file("./plugins/discourse-ai/tokenizers/multilingual-e5-large.json")
end
end
end
end

Some files were not shown because too many files have changed in this diff Show More