FEATURE: add inferred concepts system (#1330)

* FEATURE: add inferred concepts system

This commit adds a new inferred concepts system that:
- Creates a model for storing concept labels that can be applied to topics
- Provides AI personas for finding new concepts and matching existing ones
- Adds jobs for generating concepts from popular topics
- Includes a scheduled job that automatically processes engaging topics

* FEATURE: Extend inferred concepts to include posts

* Adds support for concepts to be inferred from and applied to posts
* Replaces daily task with one that handles both topics and posts
* Adds database migration for posts_inferred_concepts join table
* Updates PersonaContext to include inferred concepts



Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
Co-authored-by: Keegan George <kgeorge13@gmail.com>
This commit is contained in:
Rafael dos Santos Silva 2025-06-02 14:29:20 -03:00 committed by GitHub
parent 4ce8973e56
commit 478f31de47
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 2713 additions and 20 deletions

View File

@ -0,0 +1,70 @@
# frozen_string_literal: true
module Jobs
class GenerateInferredConcepts < ::Jobs::Base
sidekiq_options queue: "low"
# Process items to generate new concepts
#
# @param args [Hash] Contains job arguments
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
# @option args [Integer] :batch_size (100) Number of items to process in each batch
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
def execute(args = {})
return if args[:item_ids].blank? || args[:item_type].blank?
if %w[topics posts].exclude?(args[:item_type])
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
return
end
# Process items in smaller batches to avoid memory issues
batch_size = args[:batch_size] || 100
# Get the list of item IDs
item_ids = args[:item_ids]
match_only = args[:match_only] || false
# Process items in batches
item_ids.each_slice(batch_size) do |batch_item_ids|
process_batch(batch_item_ids, args[:item_type], match_only)
end
end
private
def process_batch(item_ids, item_type, match_only)
klass = item_type.singularize.classify.constantize
items = klass.where(id: item_ids)
manager = DiscourseAi::InferredConcepts::Manager.new
items.each do |item|
begin
process_item(item, item_type, match_only, manager)
rescue => e
Rails.logger.error(
"Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}",
)
end
end
end
def process_item(item, item_type, match_only, manager)
# Use the Manager method that handles both identifying and creating concepts
if match_only
if item_type == "topics"
manager.match_topic_to_concepts(item)
else # posts
manager.match_post_to_concepts(item)
end
else
if item_type == "topics"
manager.generate_concepts_from_topic(item)
else # posts
manager.generate_concepts_from_post(item)
end
end
end
end
end

View File

@ -0,0 +1,87 @@
# frozen_string_literal: true
module Jobs
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
every 1.day
# This job runs daily and generates new concepts from popular topics and posts
# It selects items based on engagement metrics and generates concepts from their content
def execute(_args)
return unless SiteSetting.inferred_concepts_enabled
process_popular_topics
process_popular_posts
end
private
def process_popular_topics
# Find candidate topics that are popular and don't have concepts yet
manager = DiscourseAi::InferredConcepts::Manager.new
candidates =
manager.find_candidate_topics(
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
min_views: SiteSetting.inferred_concepts_min_views || 100,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago,
)
return if candidates.blank?
# Process candidate topics - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: "topics",
item_ids: candidates.map(&:id),
batch_size: 10,
)
if SiteSetting.inferred_concepts_background_match
# Schedule a follow-up job to match existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: "topics",
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true,
)
end
end
def process_popular_posts
# Find candidate posts that are popular and don't have concepts yet
manager = DiscourseAi::InferredConcepts::Manager.new
candidates =
manager.find_candidate_posts(
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
exclude_first_posts: true,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago,
)
return if candidates.blank?
# Process candidate posts - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: "posts",
item_ids: candidates.map(&:id),
batch_size: 10,
)
if SiteSetting.inferred_concepts_background_match
# Schedule a follow-up job to match against existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: "posts",
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true,
)
end
end
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class InferredConcept < ActiveRecord::Base
has_many :inferred_concept_topics
has_many :topics, through: :inferred_concept_topics
has_many :inferred_concept_posts
has_many :posts, through: :inferred_concept_posts
validates :name, presence: true, uniqueness: true
end
# == Schema Information
#
# Table name: inferred_concepts
#
# id :bigint not null, primary key
# name :string not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_inferred_concepts_on_name (name) UNIQUE
#

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class InferredConceptPost < ActiveRecord::Base
belongs_to :inferred_concept
belongs_to :post
validates :inferred_concept_id, presence: true
validates :post_id, presence: true
validates :inferred_concept_id, uniqueness: { scope: :post_id }
end
# == Schema Information
#
# Table name: inferred_concept_posts
#
# inferred_concept_id :bigint
# post_id :bigint
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_inferred_concept_posts_on_inferred_concept_id (inferred_concept_id)
# index_inferred_concept_posts_uniqueness (post_id,inferred_concept_id) UNIQUE
#

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class InferredConceptTopic < ActiveRecord::Base
belongs_to :inferred_concept
belongs_to :topic
validates :inferred_concept_id, presence: true
validates :topic_id, presence: true
validates :inferred_concept_id, uniqueness: { scope: :topic_id }
end
# == Schema Information
#
# Table name: inferred_concept_topics
#
# inferred_concept_id :bigint
# topic_id :bigint
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_inferred_concept_topics_on_inferred_concept_id (inferred_concept_id)
# index_inferred_concept_topics_uniqueness (topic_id,inferred_concept_id) UNIQUE
#

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
class AiInferredConceptPostSerializer < ApplicationSerializer
attributes :id,
:post_number,
:topic_id,
:topic_title,
:username,
:avatar_template,
:created_at,
:updated_at,
:excerpt,
:truncated,
:inferred_concepts
def avatar_template
User.avatar_template(object.username, object.uploaded_avatar_id)
end
def excerpt
Post.excerpt(object.cooked)
end
def truncated
object.cooked.length > SiteSetting.post_excerpt_maxlength
end
def inferred_concepts
ActiveModel::ArraySerializer.new(
object.inferred_concepts,
each_serializer: InferredConceptSerializer,
)
end
end

View File

@ -0,0 +1,5 @@
# frozen_string_literal: true
class InferredConceptSerializer < ApplicationSerializer
attributes :id, :name, :created_at, :updated_at
end

View File

@ -22,10 +22,20 @@ export default class AiPersonaResponseFormatEditor extends Component {
type: "string",
},
type: {
type: "string",
enum: ["string", "integer", "boolean", "array"],
},
array_type: {
type: "string",
enum: ["string", "integer", "boolean"],
options: {
dependencies: {
type: "array",
},
},
},
},
required: ["key", "type"],
},
};
@ -41,7 +51,11 @@ export default class AiPersonaResponseFormatEditor extends Component {
const toDisplay = {};
this.args.data.response_format.forEach((keyDesc) => {
toDisplay[keyDesc.key] = keyDesc.type;
if (keyDesc.type === "array") {
toDisplay[keyDesc.key] = `[${keyDesc.array_type}]`;
} else {
toDisplay[keyDesc.key] = keyDesc.type;
}
});
return prettyJSON(toDisplay);

View File

@ -330,6 +330,15 @@ en:
short_summarizer:
name: "Summarizer (short form)"
description: "Default persona used to power AI short summaries for topic lists' items"
concept_finder:
name: "Concept Finder"
description: "AI Bot specialized in identifying concepts and themes in content"
concept_matcher:
name: "Concept Matcher"
description: "AI Bot specialized in matching content against existing concepts"
concept_deduplicator:
name: "Concept Deduplicator"
description: "AI Bot specialized in deduplicating concepts"
topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
searching: "Searching for: '%{query}'"
@ -549,6 +558,9 @@ en:
discord_search:
name: "Discord Search"
description: "Adds the ability to search Discord channels"
inferred_concepts:
name: "Inferred Concepts"
description: "Classifies topics and posts into areas of interest / labels."
errors:
quota_exceeded: "You have exceeded the quota for this model. Please try again in %{relative_time}."

View File

@ -417,3 +417,55 @@ discourse_ai:
default: false
client: false
hidden: true
inferred_concepts_enabled:
default: false
client: true
area: "ai-features/inferred_concepts"
inferred_concepts_background_match:
default: false
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_daily_topics_limit:
default: 20
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_min_posts:
default: 5
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_min_likes:
default: 10
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_min_views:
default: 100
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_lookback_days:
default: 30
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_daily_posts_limit:
default: 30
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_post_min_likes:
default: 5
client: false
area: "ai-features/inferred_concepts"
inferred_concepts_generate_persona:
default: "-15"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
area: "ai-features/inferred_concepts"
inferred_concepts_match_persona:
default: "-16"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
area: "ai-features/inferred_concepts"
inferred_concepts_deduplicate_persona:
default: "-17"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
area: "ai-features/inferred_concepts"

View File

@ -72,9 +72,13 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
persona.tools = tools.map { |name, value| [name, value] }
persona.response_format = instance.response_format
# Only set response_format if it's not defined as a method in the persona class
if !instance.class.instance_methods.include?(:response_format)
persona.response_format = instance.response_format
end
persona.examples = instance.examples
# Only set examples if it's not defined as a method in the persona class
persona.examples = instance.examples if !instance.class.instance_methods.include?(:examples)
persona.system_prompt = instance.system_prompt
persona.top_p = instance.top_p

View File

@ -0,0 +1,11 @@
# frozen_string_literal: true
class CreateInferredConceptsTable < ActiveRecord::Migration[7.2]
def change
create_table :inferred_concepts do |t|
t.string :name, null: false
t.timestamps
end
add_index :inferred_concepts, :name, unique: true
end
end

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
class CreateInferredConceptTopics < ActiveRecord::Migration[7.0]
def change
create_table :inferred_concept_topics, id: false do |t|
t.bigint :inferred_concept_id
t.bigint :topic_id
t.timestamps
end
add_index :inferred_concept_topics,
%i[topic_id inferred_concept_id],
unique: true,
name: "index_inferred_concept_topics_uniqueness"
add_index :inferred_concept_topics, :inferred_concept_id
end
end

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
class CreateInferredConceptPosts < ActiveRecord::Migration[7.0]
def change
create_table :inferred_concept_posts, id: false do |t|
t.bigint :inferred_concept_id
t.bigint :post_id
t.timestamps
end
add_index :inferred_concept_posts,
%i[post_id inferred_concept_id],
unique: true,
name: "index_inferred_concept_posts_uniqueness"
add_index :inferred_concept_posts, :inferred_concept_id
end
end

View File

@ -9,6 +9,7 @@ module DiscourseAi
@stream_consumer = stream_consumer
@current_key = nil
@current_value = nil
@tracking_array = false
@parser = DiscourseAi::Completions::JsonStreamingParser.new
@parser.key do |k|
@ -16,12 +17,28 @@ module DiscourseAi
@current_value = nil
end
@parser.value do |v|
@parser.value do |value|
if @current_key
stream_consumer.notify_progress(@current_key, v)
@current_key = nil
if @tracking_array
@current_value << value
stream_consumer.notify_progress(@current_key, @current_value)
else
stream_consumer.notify_progress(@current_key, value)
@current_key = nil
end
end
end
@parser.start_array do
@tracking_array = true
@current_value = []
end
@parser.end_array do
@tracking_array = false
@current_key = nil
@current_value = nil
end
end
def broken?
@ -46,8 +63,9 @@ module DiscourseAi
end
if @parser.state == :start_string && @current_key
buffered = @tracking_array ? [@parser.buf] : @parser.buf
# this is is worth notifying
stream_consumer.notify_progress(@current_key, @parser.buf)
stream_consumer.notify_progress(@current_key, buffered)
end
@current_key = nil if @parser.state == :end_value

View File

@ -45,7 +45,7 @@ module DiscourseAi
@property_cursors[prop_name] = @tracked[prop_name].length
unread
else
# Ints and bools are always returned as is.
# Ints and bools, and arrays are always returned as is.
@tracked[prop_name]
end
end

View File

@ -7,7 +7,7 @@ module DiscourseAi
@persona =
AiPersona
.all_personas(enabled_only: false)
.find { |persona| persona.id == SiteSetting.ai_discord_search_persona.to_i }
.find { |p| p.id == SiteSetting.ai_discord_search_persona.to_i }
.new
@bot =
DiscourseAi::Personas::Bot.as(

View File

@ -36,6 +36,14 @@ module DiscourseAi
persona_setting_name: "ai_discord_search_persona",
enable_setting_name: "ai_discord_search_enabled",
},
{
id: 5,
name_ref: "inferred_concepts",
name_key: "discourse_ai.features.inferred_concepts.name",
description_key: "discourse_ai.features.inferred_concepts.description",
persona_setting_name: "inferred_concepts_generate_persona",
enable_setting_name: "inferred_concepts_enabled",
},
]
end

View File

@ -0,0 +1,135 @@
# frozen_string_literal: true
module DiscourseAi
module InferredConcepts
class Applier
# Associates the provided concepts with a topic
# topic: a Topic instance
# concepts: an array of InferredConcept instances
def apply_to_topic(topic, concepts)
return if topic.blank? || concepts.blank?
topic.inferred_concepts << concepts
end
# Associates the provided concepts with a post
# post: a Post instance
# concepts: an array of InferredConcept instances
def apply_to_post(post, concepts)
return if post.blank? || concepts.blank?
post.inferred_concepts << concepts
end
# Extracts content from a topic for concept analysis
# Returns a string with the topic title and first few posts
def topic_content_for_analysis(topic)
return "" if topic.blank?
# Combine title and first few posts for analysis
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
content = "Title: #{topic.title}\n\n"
content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n")
content
end
# Extracts content from a post for concept analysis
# Returns a string with the post content
def post_content_for_analysis(post)
return "" if post.blank?
# Get the topic title for context
topic_title = post.topic&.title || ""
content = "Topic: #{topic_title}\n\n"
content += "Post by #{post.user.username}:\n#{post.raw}"
content
end
# Match a topic with existing concepts
def match_existing_concepts(topic)
return [] if topic.blank?
# Get content to analyze
content = topic_content_for_analysis(topic)
# Get all existing concepts
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts
return [] if existing_concepts.empty?
# Use the ConceptMatcher persona to match concepts
matched_concept_names = match_concepts_to_content(content, existing_concepts)
# Find concepts in the database
matched_concepts = InferredConcept.where(name: matched_concept_names)
# Apply concepts to the topic
apply_to_topic(topic, matched_concepts)
matched_concepts
end
# Match a post with existing concepts
def match_existing_concepts_for_post(post)
return [] if post.blank?
# Get content to analyze
content = post_content_for_analysis(post)
# Get all existing concepts
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts
return [] if existing_concepts.empty?
# Use the ConceptMatcher persona to match concepts
matched_concept_names = match_concepts_to_content(content, existing_concepts)
# Find concepts in the database
matched_concepts = InferredConcept.where(name: matched_concept_names)
# Apply concepts to the post
apply_to_post(post, matched_concepts)
matched_concepts
end
# Use ConceptMatcher persona to match content against provided concepts
def match_concepts_to_content(content, concept_list)
return [] if content.blank? || concept_list.blank?
# Prepare user message with only the content
user_message = content
# Use the ConceptMatcher persona to match concepts
persona =
AiPersona
.all_personas(enabled_only: false)
.find { |p| p.id == SiteSetting.inferred_concepts_match_persona.to_i }
.new
llm = LlmModel.find(persona.class.default_llm_id)
input = { type: :user, content: content }
context =
DiscourseAi::Personas::BotContext.new(
messages: [input],
user: Discourse.system_user,
inferred_concepts: concept_list,
)
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
structured_output = nil
bot.reply(context) do |partial, _, type|
structured_output = partial if type == :structured_output
end
structured_output&.read_buffered_property(:matching_concepts) || []
end
end
end
end

View File

@ -0,0 +1,176 @@
# frozen_string_literal: true
module DiscourseAi
module InferredConcepts
class Finder
# Identifies potential concepts from provided content
# Returns an array of concept names (strings)
def identify_concepts(content)
return [] if content.blank?
# Use the ConceptFinder persona to identify concepts
persona =
AiPersona
.all_personas(enabled_only: false)
.find { |p| p.id == SiteSetting.inferred_concepts_generate_persona.to_i }
.new
llm = LlmModel.find(persona.default_llm_id)
context =
DiscourseAi::Personas::BotContext.new(
messages: [{ type: :user, content: content }],
user: Discourse.system_user,
inferred_concepts: DiscourseAi::InferredConcepts::Manager.new.list_concepts,
)
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
structured_output = nil
bot.reply(context) do |partial, _, type|
structured_output = partial if type == :structured_output
end
structured_output&.read_buffered_property(:concepts) || []
end
# Creates or finds concepts in the database from provided names
# Returns an array of InferredConcept instances
def create_or_find_concepts(concept_names)
return [] if concept_names.blank?
concept_names.map { |name| InferredConcept.find_or_create_by(name: name) }
end
# Finds candidate topics to use for concept generation
#
# @param limit [Integer] Maximum number of topics to return
# @param min_posts [Integer] Minimum number of posts in topic
# @param min_likes [Integer] Minimum number of likes across all posts
# @param min_views [Integer] Minimum number of views
# @param exclude_topic_ids [Array<Integer>] Topic IDs to exclude
# @param category_ids [Array<Integer>] Only include topics from these categories (optional)
# @param created_after [DateTime] Only include topics created after this time (optional)
# @return [Array<Topic>] Array of Topic objects that are good candidates
def find_candidate_topics(
limit: 100,
min_posts: 5,
min_likes: 10,
min_views: 100,
exclude_topic_ids: [],
category_ids: nil,
created_after: 30.days.ago
)
query =
Topic.where(
"topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?",
min_posts,
min_views,
min_likes,
)
# Apply additional filters
query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present?
query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present?
query = query.where("topics.created_at >= ?", created_after) if created_after.present?
# Exclude PM topics (if they exist in Discourse)
query = query.where(archetype: Archetype.default)
# Exclude topics that already have concepts
topics_with_concepts = <<~SQL
SELECT DISTINCT topic_id
FROM inferred_concept_topics
SQL
query = query.where("topics.id NOT IN (#{topics_with_concepts})")
# Score and order topics by engagement (combination of views, likes, and posts)
query =
query.select(
"topics.*,
(topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score",
).order("engagement_score DESC")
# Return limited number of topics
query.limit(limit)
end
# Find candidate posts that are good for concept generation
#
# @param limit [Integer] Maximum number of posts to return
# @param min_likes [Integer] Minimum number of likes
# @param exclude_first_posts [Boolean] Exclude first posts in topics
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
# @param created_after [DateTime] Only include posts created after this time
# @return [Array<Post>] Array of Post objects that are good candidates
def find_candidate_posts(
limit: 100,
min_likes: 5,
exclude_first_posts: true,
exclude_post_ids: [],
category_ids: nil,
created_after: 30.days.ago
)
query = Post.where("posts.like_count >= ?", min_likes)
# Exclude first posts if specified
query = query.where("posts.post_number > 1") if exclude_first_posts
# Apply additional filters
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
# Filter by category if specified
if category_ids.present?
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
end
# Exclude posts that already have concepts
posts_with_concepts = <<~SQL
SELECT DISTINCT post_id
FROM inferred_concept_posts
SQL
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
# Order by engagement (likes)
query = query.order(like_count: :desc)
# Return limited number of posts
query.limit(limit)
end
# Deduplicate and standardize a list of concepts
# @param concept_names [Array<String>] List of concept names to deduplicate
# @return [Hash] Hash with deduplicated concepts and mapping
def deduplicate_concepts(concept_names)
return { deduplicated_concepts: [], mapping: {} } if concept_names.blank?
# Use the ConceptDeduplicator persona to deduplicate concepts
persona =
AiPersona
.all_personas(enabled_only: false)
.find { |p| p.id == SiteSetting.inferred_concepts_deduplicate_persona.to_i }
.new
llm = LlmModel.find(persona.default_llm_id)
# Create the input for the deduplicator
input = { type: :user, content: concept_names.join(", ") }
context =
DiscourseAi::Personas::BotContext.new(messages: [input], user: Discourse.system_user)
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
structured_output = nil
bot.reply(context) do |partial, _, type|
structured_output = partial if type == :structured_output
end
structured_output&.read_buffered_property(:streamlined_tags) || []
end
end
end
end

View File

@ -0,0 +1,201 @@
# frozen_string_literal: true
module DiscourseAi
module InferredConcepts
class Manager
# Get a list of existing concepts
# @param limit [Integer, nil] Optional maximum number of concepts to return
# @return [Array<InferredConcept>] Array of InferredConcept objects
def list_concepts(limit: nil)
query = InferredConcept.all.order("name ASC")
# Apply limit if provided
query = query.limit(limit) if limit.present?
query.pluck(:name)
end
# Deduplicate concepts in batches by letter
# This method will:
# 1. Group concepts by first letter
# 2. Process each letter group separately through the deduplicator
# 3. Do a final pass with all deduplicated concepts
# @return [Hash] Statistics about the deduplication process
def deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 150)
# Get all concepts
all_concepts = list_concepts
return if all_concepts.empty?
letter_groups = Hash.new { |h, k| h[k] = [] }
# Group concepts by first letter
all_concepts.each do |concept|
first_char = concept[0]&.upcase
if first_char && first_char.match?(/[A-Z]/)
letter_groups[first_char] << concept
else
# Non-alphabetic or empty concepts go in a special group
letter_groups["#"] << concept
end
end
# Process each letter group
letter_deduplicated_concepts = []
finder = DiscourseAi::InferredConcepts::Finder.new
letter_groups.each do |letter, concepts|
next if concepts.empty?
batches = concepts.each_slice(per_letter_batch).to_a
batches.each do |batch|
result = finder.deduplicate_concepts(batch)
letter_deduplicated_concepts.concat(result)
end
end
# Final pass with all deduplicated concepts
if letter_deduplicated_concepts.present?
final_result = []
batches = letter_deduplicated_concepts.each_slice(full_pass_batch).to_a
batches.each do |batch|
dedups = finder.deduplicate_concepts(batch)
final_result.concat(dedups)
end
# Remove duplicates
final_result.uniq!
# Apply the deduplicated concepts
InferredConcept.where.not(name: final_result).destroy_all
InferredConcept.insert_all(final_result.map { |concept| { name: concept } })
end
end
# Extract new concepts from arbitrary content
# @param content [String] The content to analyze
# @return [Array<String>] The identified concept names
def identify_concepts(content)
DiscourseAi::InferredConcepts::Finder.new.identify_concepts(content)
end
# Identify and create concepts from content without applying them to any topic
# @param content [String] The content to analyze
# @return [Array<InferredConcept>] The created or found concepts
def generate_concepts_from_content(content)
return [] if content.blank?
# Identify concepts
finder = DiscourseAi::InferredConcepts::Finder.new
concept_names = finder.identify_concepts(content)
return [] if concept_names.blank?
# Create or find concepts in the database
finder.create_or_find_concepts(concept_names)
end
# Generate concepts from a topic's content without applying them to the topic
# @param topic [Topic] A Topic instance
# @return [Array<InferredConcept>] The created or found concepts
def generate_concepts_from_topic(topic)
return [] if topic.blank?
# Get content to analyze
applier = DiscourseAi::InferredConcepts::Applier.new
content = applier.topic_content_for_analysis(topic)
return [] if content.blank?
# Generate concepts from the content
generate_concepts_from_content(content)
end
# Generate concepts from a post's content without applying them to the post
# @param post [Post] A Post instance
# @return [Array<InferredConcept>] The created or found concepts
def generate_concepts_from_post(post)
return [] if post.blank?
# Get content to analyze
applier = DiscourseAi::InferredConcepts::Applier.new
content = applier.post_content_for_analysis(post)
return [] if content.blank?
# Generate concepts from the content
generate_concepts_from_content(content)
end
# Match a topic against existing concepts
# @param topic [Topic] A Topic instance
# @return [Array<InferredConcept>] The concepts that were applied
def match_topic_to_concepts(topic)
return [] if topic.blank?
DiscourseAi::InferredConcepts::Applier.new.match_existing_concepts(topic)
end
# Match a post against existing concepts
# @param post [Post] A Post instance
# @return [Array<InferredConcept>] The concepts that were applied
def match_post_to_concepts(post)
return [] if post.blank?
DiscourseAi::InferredConcepts::Applier.new.match_existing_concepts_for_post(post)
end
# Find topics that have a specific concept
# @param concept_name [String] The name of the concept to search for
# @return [Array<Topic>] Topics that have the specified concept
def search_topics_by_concept(concept_name)
concept = ::InferredConcept.find_by(name: concept_name)
return [] unless concept
concept.topics
end
# Find posts that have a specific concept
# @param concept_name [String] The name of the concept to search for
# @return [Array<Post>] Posts that have the specified concept
def search_posts_by_concept(concept_name)
concept = ::InferredConcept.find_by(name: concept_name)
return [] unless concept
concept.posts
end
# Match arbitrary content against existing concepts
# @param content [String] The content to analyze
# @return [Array<String>] Names of matching concepts
def match_content_to_concepts(content)
existing_concepts = InferredConcept.all.pluck(:name)
return [] if existing_concepts.empty?
DiscourseAi::InferredConcepts::Applier.new.match_concepts_to_content(
content,
existing_concepts,
)
end
# Find candidate topics that are good for concept generation
#
# @param opts [Hash] Options to pass to the finder
# @option opts [Integer] :limit (100) Maximum number of topics to return
# @option opts [Integer] :min_posts (5) Minimum number of posts in topic
# @option opts [Integer] :min_likes (10) Minimum number of likes across all posts
# @option opts [Integer] :min_views (100) Minimum number of views
# @option opts [Array<Integer>] :exclude_topic_ids ([]) Topic IDs to exclude
# @option opts [Array<Integer>] :category_ids (nil) Only include topics from these categories
# @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time
# @return [Array<Topic>] Array of Topic objects that are good candidates
def find_candidate_topics(opts = {})
DiscourseAi::InferredConcepts::Finder.new.find_candidate_topics(**opts)
end
# Find candidate posts that are good for concept generation
# @param opts [Hash] Options to pass to the finder
# @return [Array<Post>] Array of Post objects that are good candidates
def find_candidate_posts(opts = {})
DiscourseAi::InferredConcepts::Finder.new.find_candidate_posts(**opts)
end
end
end
end

View File

@ -152,10 +152,12 @@ module DiscourseAi
raw_context << partial
current_thinking << partial
end
elsif partial.is_a?(DiscourseAi::Completions::StructuredOutput)
update_blk.call(partial, nil, :structured_output)
else
update_blk.call(partial)
elsif update_blk.present?
if partial.is_a?(DiscourseAi::Completions::StructuredOutput)
update_blk.call(partial, nil, :structured_output)
else
update_blk.call(partial)
end
end
end
end
@ -316,7 +318,13 @@ module DiscourseAi
response_format
.to_a
.reduce({}) do |memo, format|
memo[format["key"].to_sym] = { type: format["type"] }
type_desc = { type: format["type"] }
if format["type"] == "array"
type_desc[:items] = { type: format["array_type"] || "string" }
end
memo[format["key"].to_sym] = type_desc
memo
end

View File

@ -17,7 +17,8 @@ module DiscourseAi
:context_post_ids,
:feature_name,
:resource_url,
:cancel_manager
:cancel_manager,
:inferred_concepts
def initialize(
post: nil,
@ -35,7 +36,8 @@ module DiscourseAi
context_post_ids: nil,
feature_name: "bot",
resource_url: nil,
cancel_manager: nil
cancel_manager: nil,
inferred_concepts: []
)
@participants = participants
@user = user
@ -54,7 +56,7 @@ module DiscourseAi
@resource_url = resource_url
@feature_name = feature_name
@resource_url = resource_url
@inferred_concepts = inferred_concepts
@cancel_manager = cancel_manager
@ -68,7 +70,15 @@ module DiscourseAi
end
# these are strings that can be safely interpolated into templates
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
TEMPLATE_PARAMS = %w[
time
site_url
site_title
site_description
participants
resource_url
inferred_concepts
]
def lookup_template_param(key)
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
@ -114,6 +124,7 @@ module DiscourseAi
skip_tool_details: @skip_tool_details,
feature_name: @feature_name,
resource_url: @resource_url,
inferred_concepts: @inferred_concepts,
}
end
end

View File

@ -0,0 +1,53 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class ConceptDeduplicator < Persona
def self.default_enabled
false
end
def system_prompt
<<~PROMPT.strip
You will be given a list of machine-generated tags.
Your task is to streamline this list by merging entries who are similar or related.
Please follow these steps to create a streamlined list of tags:
1. Review the entire list of tags carefully.
2. Identify and remove any exact duplicates.
3. Look for tags that are too specific or niche, and consider removing them or replacing them with more general terms.
4. If there are multiple tags that convey similar concepts, choose the best one and remove the others, or add a new one that covers the missing aspect.
5. Ensure that the remaining tags are relevant and useful for describing the content.
When deciding which tags are "best", consider the following criteria:
- Relevance: How well does the tag describe the core content or theme?
- Generality: Is the tag specific enough to be useful, but not so specific that it's unlikely to be searched for?
- Clarity: Is the tag easy to understand and free from ambiguity?
- Popularity: Would this tag likely be used by people searching for this type of content?
Example Input:
AI Bias, AI Bots, AI Ethics, AI Helper, AI Integration, AI Moderation, AI Search, AI-Driven Moderation, AI-Generated Post Illustrations, AJAX Events, AJAX Requests, AMA Events, API, API Access, API Authentication, API Automation, API Call, API Changes, API Compliance, API Configuration, API Costs, API Documentation, API Endpoint, API Endpoints, API Functions, API Integration, API Key, API Keys, API Limitation, API Limitations, API Permissions, API Rate Limiting, API Request, API Request Optimization, API Requests, API Security, API Suspension, API Token, API Tokens, API Translation, API Versioning, API configuration, API endpoint, API key, APIs, APK, APT Package Manager, ARIA, ARIA Tags, ARM Architecture, ARM-based, AWS, AWS Lightsail, AWS RDS, AWS S3, AWS Translate, AWS costs, AWS t2.micro, Abbreviation Expansion, Abbreviations
Example Output:
AI, AJAX, API, APK, APT Package Manager, ARIA, ARM Architecture, AWS, Abbreviations
Please provide your streamlined list of tags within <streamlined_tags> key.
Remember, the goal is to create a more focused and effective set of tags while maintaining the essence of the original list.
Your output should be in the following format:
<o>
{
"streamlined_tags": ["tag1", "tag3"]
}
</o>
PROMPT
end
def response_format
[{ "key" => "streamlined_tags", "type" => "array", "array_type" => "string" }]
end
end
end
end

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class ConceptFinder < Persona
def self.default_enabled
false
end
def system_prompt
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts(limit: 100)
existing_concepts_text = ""
existing_concepts_text = <<~CONCEPTS if existing_concepts.present?
The following concepts already exist in the system:
#{existing_concepts.join(", ")}
You can reuse these existing concepts if they apply to the content, or suggest new concepts.
CONCEPTS
<<~PROMPT.strip
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
Your job is to extract meaningful labels that can be used to categorize content.
Guidelines for generating concepts:
- Extract up to 7 concepts from the provided content
- Concepts should be single words or short phrases (1-3 words maximum)
- Focus on substantive topics, themes, technologies, methodologies, or domains
- Avoid overly general terms like "discussion" or "question"
- Ensure concepts are relevant to the core content
- Do not include proper nouns unless they represent key technologies or methodologies
- Maintain the original language of the text being analyzed
#{existing_concepts_text}
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
Your output should be in the following format:
<o>
{"concepts": ["concept1", "concept2", "concept3"]}
</o>
Where the concepts are replaced by the actual concepts you've identified.
PROMPT
end
def response_format
[{ "key" => "concepts", "type" => "array", "array_type" => "string" }]
end
end
end
end

View File

@ -0,0 +1,43 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class ConceptMatcher < Persona
def self.default_enabled
false
end
def system_prompt
<<~PROMPT.strip
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
Your job is to analyze the content and determine which concepts from the list apply to it.
Guidelines for matching concepts:
- Only select concepts that are clearly relevant to the content
- The content must substantially discuss or relate to the concept
- Superficial mentions are not enough to consider a concept relevant
- Be precise and selective - don't match concepts that are only tangentially related
- Consider both explicit mentions and implicit discussions of concepts
- Maintain the original language of the text being analyzed
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
- If no concepts from the list match the content, return an empty array
The list of available concepts is:
{inferred_concepts}
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
Your output should be in the following format:
<o>
{"matching_concepts": ["concept1", "concept3", "concept5"]}
</o>
Only include concepts from the provided list that match the content. If no concepts match, return an empty array.
PROMPT
end
def response_format
[{ "key" => "matching_concepts", "type" => "array", "array_type" => "string" }]
end
end
end
end

View File

@ -52,6 +52,9 @@ module DiscourseAi
ShortSummarizer => -12,
Designer => -13,
ForumResearcher => -14,
ConceptFinder => -15,
ConceptMatcher => -16,
ConceptDeduplicator => -17,
}
end

View File

@ -11,6 +11,9 @@ module DiscourseAi
-> { where(classification_type: "sentiment") },
class_name: "ClassificationResult",
as: :target
has_many :inferred_concept_posts
has_many :inferred_concepts, through: :inferred_concept_posts
end
end
end

View File

@ -11,6 +11,9 @@ module DiscourseAi
-> { where(summary_type: AiSummary.summary_types[:gist]) },
class_name: "AiSummary",
as: :target
has_many :inferred_concept_topics
has_many :inferred_concepts, through: :inferred_concept_topics
end
end
end

View File

@ -0,0 +1,2 @@
# frozen_string_literal: true
Fabricator(:inferred_concept) { name { sequence(:name) { |i| "concept_#{i}" } } }

View File

@ -0,0 +1,167 @@
# frozen_string_literal: true
RSpec.describe Jobs::GenerateInferredConcepts do
fab!(:topic)
fab!(:post)
fab!(:concept) { Fabricate(:inferred_concept, name: "programming") }
before { SiteSetting.inferred_concepts_enabled = true }
describe "#execute" do
it "does nothing with blank item_ids" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:match_topic_to_concepts,
)
subject.execute(item_type: "topics", item_ids: [])
subject.execute(item_type: "topics", item_ids: nil)
end
it "does nothing with blank item_type" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:match_topic_to_concepts,
)
subject.execute(item_type: "", item_ids: [topic.id])
subject.execute(item_type: nil, item_ids: [topic.id])
end
it "validates item_type to be topics or posts" do
allow(Rails.logger).to receive(:error).with(/Invalid item_type/)
subject.execute(item_type: "invalid", item_ids: [1])
end
context "with topics" do
it "processes topics in match_only mode" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:match_topic_to_concepts,
).with(topic)
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: true)
end
it "processes topics in generation mode" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:generate_concepts_from_topic,
).with(topic)
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: false)
end
it "handles topics that don't exist" do
# Non-existent IDs should be silently skipped (no error expected)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:match_topic_to_concepts,
)
subject.execute(
item_type: "topics",
item_ids: [999_999], # non-existent ID
match_only: true,
)
end
it "processes multiple topics" do
topic2 = Fabricate(:topic)
manager_instance = instance_double(DiscourseAi::InferredConcepts::Manager)
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager_instance)
allow(manager_instance).to receive(:match_topic_to_concepts).with(topic)
allow(manager_instance).to receive(:match_topic_to_concepts).with(topic2)
subject.execute(item_type: "topics", item_ids: [topic.id, topic2.id], match_only: true)
end
it "processes topics in batches" do
topics = Array.new(5) { Fabricate(:topic) }
topic_ids = topics.map(&:id)
# Should process in batches of 3
allow(Topic).to receive(:where).with(id: topic_ids[0..2]).and_call_original
allow(Topic).to receive(:where).with(id: topic_ids[3..4]).and_call_original
subject.execute(item_type: "topics", item_ids: topic_ids, batch_size: 3, match_only: true)
end
end
context "with posts" do
it "processes posts in match_only mode" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:match_post_to_concepts,
).with(post)
subject.execute(item_type: "posts", item_ids: [post.id], match_only: true)
end
it "processes posts in generation mode" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:generate_concepts_from_post,
).with(post)
subject.execute(item_type: "posts", item_ids: [post.id], match_only: false)
end
it "handles posts that don't exist" do
# Non-existent IDs should be silently skipped (no error expected)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:match_post_to_concepts,
)
subject.execute(
item_type: "posts",
item_ids: [999_999], # non-existent ID
match_only: true,
)
end
it "processes multiple posts" do
post2 = Fabricate(:post)
manager_instance = instance_double(DiscourseAi::InferredConcepts::Manager)
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager_instance)
allow(manager_instance).to receive(:match_post_to_concepts).with(post)
allow(manager_instance).to receive(:match_post_to_concepts).with(post2)
subject.execute(item_type: "posts", item_ids: [post.id, post2.id], match_only: true)
end
end
it "handles exceptions during processing" do
allow_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:match_topic_to_concepts,
).and_raise(StandardError.new("Test error"))
allow(Rails.logger).to receive(:error).with(
/Error generating concepts from topic #{topic.id}/,
)
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: true)
end
it "uses default batch size of 100" do
topics = Array.new(150) { Fabricate(:topic) }
topic_ids = topics.map(&:id)
# Should process in batches of 100
allow(Topic).to receive(:where).with(id: topic_ids[0..99]).and_call_original
allow(Topic).to receive(:where).with(id: topic_ids[100..149]).and_call_original
subject.execute(item_type: "topics", item_ids: topic_ids, match_only: true)
end
it "respects custom batch size" do
topics = Array.new(5) { Fabricate(:topic) }
topic_ids = topics.map(&:id)
# Should process in batches of 2
allow(Topic).to receive(:where).with(id: topic_ids[0..1]).and_call_original
allow(Topic).to receive(:where).with(id: topic_ids[2..3]).and_call_original
allow(Topic).to receive(:where).with(id: topic_ids[4..4]).and_call_original
subject.execute(item_type: "topics", item_ids: topic_ids, batch_size: 2, match_only: true)
end
end
end

View File

@ -0,0 +1,259 @@
# frozen_string_literal: true
RSpec.describe Jobs::GenerateConceptsFromPopularItems do
fab!(:topic) { Fabricate(:topic, posts_count: 6, views: 150, like_count: 12) }
fab!(:post) { Fabricate(:post, like_count: 8, post_number: 2) }
before do
SiteSetting.inferred_concepts_enabled = true
SiteSetting.inferred_concepts_daily_topics_limit = 20
SiteSetting.inferred_concepts_daily_posts_limit = 30
SiteSetting.inferred_concepts_min_posts = 5
SiteSetting.inferred_concepts_min_likes = 10
SiteSetting.inferred_concepts_min_views = 100
SiteSetting.inferred_concepts_post_min_likes = 5
SiteSetting.inferred_concepts_lookback_days = 30
SiteSetting.inferred_concepts_background_match = false
end
describe "#execute" do
it "does nothing when inferred_concepts_enabled is false" do
SiteSetting.inferred_concepts_enabled = false
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:find_candidate_topics,
)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
:find_candidate_posts,
)
allow(Jobs).to receive(:enqueue)
subject.execute({})
end
it "processes popular topics when enabled" do
candidate_topics = [topic]
freeze_time do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).with(
limit: 20,
min_posts: 5,
min_likes: 10,
min_views: 100,
created_after: 30.days.ago,
).and_return(candidate_topics)
allow(Jobs).to receive(:enqueue).with(
:generate_inferred_concepts,
item_type: "topics",
item_ids: [topic.id],
batch_size: 10,
)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).and_return([])
subject.execute({})
end
end
it "processes popular posts when enabled" do
candidate_posts = [post]
freeze_time do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).and_return([])
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).with(
limit: 30,
min_likes: 5,
exclude_first_posts: true,
created_after: 30.days.ago,
).and_return(candidate_posts)
allow(Jobs).to receive(:enqueue).with(
:generate_inferred_concepts,
item_type: "posts",
item_ids: [post.id],
batch_size: 10,
)
subject.execute({})
end
end
it "schedules background matching jobs when enabled" do
SiteSetting.inferred_concepts_background_match = true
candidate_topics = [topic]
candidate_posts = [post]
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).and_return(candidate_topics)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).and_return(candidate_posts)
# Expect generation jobs
allow(Jobs).to receive(:enqueue).with(
:generate_inferred_concepts,
item_type: "topics",
item_ids: [topic.id],
batch_size: 10,
)
allow(Jobs).to receive(:enqueue).with(
:generate_inferred_concepts,
item_type: "posts",
item_ids: [post.id],
batch_size: 10,
)
# Expect background matching jobs
allow(Jobs).to receive(:enqueue_in).with(
1.hour,
:generate_inferred_concepts,
item_type: "topics",
item_ids: [topic.id],
batch_size: 10,
match_only: true,
)
allow(Jobs).to receive(:enqueue_in).with(
1.hour,
:generate_inferred_concepts,
item_type: "posts",
item_ids: [post.id],
batch_size: 10,
match_only: true,
)
subject.execute({})
end
it "does not schedule jobs when no candidates found" do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).and_return([])
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).and_return([])
allow(Jobs).to receive(:enqueue)
allow(Jobs).to receive(:enqueue_in)
subject.execute({})
end
it "uses site setting values for topic filtering" do
SiteSetting.inferred_concepts_daily_topics_limit = 50
SiteSetting.inferred_concepts_min_posts = 8
SiteSetting.inferred_concepts_min_likes = 15
SiteSetting.inferred_concepts_min_views = 200
SiteSetting.inferred_concepts_lookback_days = 45
freeze_time do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).with(
limit: 50,
min_posts: 8,
min_likes: 15,
min_views: 200,
created_after: 45.days.ago,
).and_return([])
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).and_return([])
subject.execute({})
end
end
it "uses site setting values for post filtering" do
SiteSetting.inferred_concepts_daily_posts_limit = 40
SiteSetting.inferred_concepts_post_min_likes = 8
SiteSetting.inferred_concepts_lookback_days = 45
freeze_time do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).and_return([])
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).with(
limit: 40,
min_likes: 8,
exclude_first_posts: true,
created_after: 45.days.ago,
).and_return([])
subject.execute({})
end
end
it "handles nil site setting values gracefully" do
SiteSetting.inferred_concepts_daily_topics_limit = nil
SiteSetting.inferred_concepts_daily_posts_limit = nil
SiteSetting.inferred_concepts_min_posts = nil
SiteSetting.inferred_concepts_min_likes = nil
SiteSetting.inferred_concepts_min_views = nil
SiteSetting.inferred_concepts_post_min_likes = nil
# Keep lookback_days at default so .days.ago doesn't fail
freeze_time do
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).with(
limit: 0, # nil becomes 0
min_posts: 0, # nil becomes 0
min_likes: 0, # nil becomes 0
min_views: 0, # nil becomes 0
created_after: 30.days.ago, # default from before block
).and_return([])
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).with(
limit: 0, # nil becomes 0
min_likes: 0, # nil becomes 0
exclude_first_posts: true,
created_after: 30.days.ago, # default from before block
).and_return([])
subject.execute({})
end
end
it "processes both topics and posts in the same run" do
candidate_topics = [topic]
candidate_posts = [post]
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_topics,
).and_return(candidate_topics)
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
:find_candidate_posts,
).and_return(candidate_posts)
allow(Jobs).to receive(:enqueue).twice
subject.execute({})
end
end
context "when scheduling the job" do
it "is scheduled to run daily" do
expect(described_class.every).to eq(1.day)
end
end
end

View File

@ -672,5 +672,87 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
expect(structured_output.read_buffered_property(:key)).to eq("Hello!\n There")
end
end
it "works with JSON schema array types" do
schema = {
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
plain: {
type: "string",
},
key: {
type: "array",
items: {
type: "string",
},
},
},
required: %w[plain key],
additionalProperties: false,
},
strict: true,
},
}
messages =
[
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
{ type: "content_block_delta", delta: { text: "\"" } },
{ type: "content_block_delta", delta: { text: "key" } },
{ type: "content_block_delta", delta: { text: "\":" } },
{ type: "content_block_delta", delta: { text: " [\"" } },
{ type: "content_block_delta", delta: { text: "Hello!" } },
{ type: "content_block_delta", delta: { text: " I am" } },
{ type: "content_block_delta", delta: { text: " a " } },
{ type: "content_block_delta", delta: { text: "chunk\"," } },
{ type: "content_block_delta", delta: { text: "\"There" } },
{ type: "content_block_delta", delta: { text: "\"]," } },
{ type: "content_block_delta", delta: { text: " \"plain" } },
{ type: "content_block_delta", delta: { text: "\":\"" } },
{ type: "content_block_delta", delta: { text: "I'm here" } },
{ type: "content_block_delta", delta: { text: " too\"}" } },
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
].map { |message| encode_message(message) }
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
request = nil
bedrock_mock.with_chunk_array_support do
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: messages)
structured_output = nil
proxy.generate("hello world", response_format: schema, user: user) do |partial|
structured_output = partial
end
expected = {
"max_tokens" => 4096,
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [
{ "role" => "user", "content" => "hello world" },
{ "role" => "assistant", "content" => "{" },
],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)
expect(structured_output.read_buffered_property(:key)).to contain_exactly(
"Hello! I am a chunk",
"There",
)
expect(structured_output.read_buffered_property(:plain)).to eq("I'm here too")
end
end
end
end

View File

@ -16,6 +16,12 @@ RSpec.describe DiscourseAi::Completions::StructuredOutput do
status: {
type: "string",
},
list: {
type: "array",
items: {
type: "string",
},
},
},
)
end
@ -64,6 +70,48 @@ RSpec.describe DiscourseAi::Completions::StructuredOutput do
# No partial string left to read.
expect(structured_output.read_buffered_property(:status)).to eq("")
end
it "supports array types" do
chunks = [
+"{ \"",
+"list",
+"\":",
+" [\"",
+"Hello!",
+" I am",
+" a ",
+"chunk\",",
+"\"There\"",
+"]}",
]
structured_output << chunks[0]
structured_output << chunks[1]
structured_output << chunks[2]
expect(structured_output.read_buffered_property(:list)).to eq(nil)
structured_output << chunks[3]
expect(structured_output.read_buffered_property(:list)).to eq([""])
structured_output << chunks[4]
expect(structured_output.read_buffered_property(:list)).to eq(["Hello!"])
structured_output << chunks[5]
structured_output << chunks[6]
structured_output << chunks[7]
expect(structured_output.read_buffered_property(:list)).to eq(["Hello! I am a chunk"])
structured_output << chunks[8]
expect(structured_output.read_buffered_property(:list)).to eq(
["Hello! I am a chunk", "There"],
)
structured_output << chunks[9]
expect(structured_output.read_buffered_property(:list)).to eq(
["Hello! I am a chunk", "There"],
)
end
end
describe "dealing with non-JSON responses" do

View File

@ -0,0 +1,320 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::InferredConcepts::Applier do
subject(:applier) { described_class.new }
fab!(:topic) { Fabricate(:topic, title: "Ruby Programming Tutorial") }
fab!(:post) { Fabricate(:post, raw: "This post is about advanced testing techniques") }
fab!(:user) { Fabricate(:user, username: "dev_user") }
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
fab!(:llm_model) { Fabricate(:fake_model) }
before do
SiteSetting.inferred_concepts_match_persona = -1
SiteSetting.inferred_concepts_enabled = true
# Set up the post's user
post.update!(user: user)
end
describe "#apply_to_topic" do
it "does nothing for blank topic or concepts" do
expect { applier.apply_to_topic(nil, [concept1]) }.not_to raise_error
expect { applier.apply_to_topic(topic, []) }.not_to raise_error
expect { applier.apply_to_topic(topic, nil) }.not_to raise_error
end
it "associates concepts with topic" do
applier.apply_to_topic(topic, [concept1, concept2])
expect(topic.inferred_concepts).to include(concept1, concept2)
expect(concept1.topics).to include(topic)
expect(concept2.topics).to include(topic)
end
end
describe "#apply_to_post" do
it "does nothing for blank post or concepts" do
expect { applier.apply_to_post(nil, [concept1]) }.not_to raise_error
expect { applier.apply_to_post(post, []) }.not_to raise_error
expect { applier.apply_to_post(post, nil) }.not_to raise_error
end
it "associates concepts with post" do
applier.apply_to_post(post, [concept1, concept2])
expect(post.inferred_concepts).to include(concept1, concept2)
expect(concept1.posts).to include(post)
expect(concept2.posts).to include(post)
end
end
describe "#topic_content_for_analysis" do
it "returns empty string for blank topic" do
expect(applier.topic_content_for_analysis(nil)).to eq("")
end
it "extracts title and posts content" do
# Create additional posts for the topic
post1 = Fabricate(:post, topic: topic, post_number: 1, raw: "First post content", user: user)
post2 = Fabricate(:post, topic: topic, post_number: 2, raw: "Second post content", user: user)
content = applier.topic_content_for_analysis(topic)
expect(content).to include(topic.title)
expect(content).to include("First post content")
expect(content).to include("Second post content")
expect(content).to include(user.username)
expect(content).to include("1)")
expect(content).to include("2)")
end
it "limits to first 10 posts" do
# Create 12 posts for the topic
12.times { |i| Fabricate(:post, topic: topic, post_number: i + 1, user: user) }
allow(Post).to receive(:where).with(topic_id: topic.id).and_call_original
allow_any_instance_of(ActiveRecord::Relation).to receive(:limit).with(10).and_call_original
applier.topic_content_for_analysis(topic)
expect(Post).to have_received(:where).with(topic_id: topic.id)
end
end
describe "#post_content_for_analysis" do
it "returns empty string for blank post" do
expect(applier.post_content_for_analysis(nil)).to eq("")
end
it "extracts post content with topic context" do
content = applier.post_content_for_analysis(post)
expect(content).to include(post.topic.title)
expect(content).to include(post.raw)
expect(content).to include(post.user.username)
expect(content).to include("Topic:")
expect(content).to include("Post by")
end
it "handles post without topic" do
# Mock the post to return nil for topic
allow(post).to receive(:topic).and_return(nil)
content = applier.post_content_for_analysis(post)
expect(content).to include(post.raw)
expect(content).to include(post.user.username)
expect(content).to include("Topic: ")
end
end
describe "#match_existing_concepts" do
let(:manager) { instance_double(DiscourseAi::InferredConcepts::Manager) }
before do
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
allow(manager).to receive(:list_concepts).and_return(%w[programming testing ruby])
end
it "returns empty array for blank topic" do
expect(applier.match_existing_concepts(nil)).to eq([])
end
it "returns empty array when no existing concepts" do
allow(manager).to receive(:list_concepts).and_return([])
result = applier.match_existing_concepts(topic)
expect(result).to eq([])
end
it "matches concepts and applies them to topic" do
# Test the real implementation without stubbing internal methods
allow(InferredConcept).to receive(:where).with(name: ["programming"]).and_return([concept1])
# Mock the LLM interaction
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
bot_double = instance_spy(DiscourseAi::Personas::Bot)
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(
structured_output_double,
nil,
:structured_output,
)
allow(structured_output_double).to receive(:read_buffered_property).with(
:matching_concepts,
).and_return(["programming"])
result = applier.match_existing_concepts(topic)
expect(result).to eq([concept1])
end
end
describe "#match_existing_concepts_for_post" do
let(:manager) { instance_double(DiscourseAi::InferredConcepts::Manager) }
before do
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
allow(manager).to receive(:list_concepts).and_return(%w[programming testing ruby])
end
it "returns empty array for blank post" do
expect(applier.match_existing_concepts_for_post(nil)).to eq([])
end
it "returns empty array when no existing concepts" do
allow(manager).to receive(:list_concepts).and_return([])
result = applier.match_existing_concepts_for_post(post)
expect(result).to eq([])
end
it "matches concepts and applies them to post" do
# Test the real implementation without stubbing internal methods
allow(InferredConcept).to receive(:where).with(name: ["testing"]).and_return([concept2])
# Mock the LLM interaction
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
bot_double = instance_spy(DiscourseAi::Personas::Bot)
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(
structured_output_double,
nil,
:structured_output,
)
allow(structured_output_double).to receive(:read_buffered_property).with(
:matching_concepts,
).and_return(["testing"])
result = applier.match_existing_concepts_for_post(post)
expect(result).to eq([concept2])
end
end
describe "#match_concepts_to_content" do
it "returns empty array for blank content or concept list" do
expect(applier.match_concepts_to_content("", ["concept1"])).to eq([])
expect(applier.match_concepts_to_content(nil, ["concept1"])).to eq([])
expect(applier.match_concepts_to_content("content", [])).to eq([])
expect(applier.match_concepts_to_content("content", nil)).to eq([])
end
it "uses ConceptMatcher persona to match concepts" do
content = "This is about Ruby programming"
concept_list = %w[programming testing ruby]
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
bot_double = instance_spy(DiscourseAi::Personas::Bot)
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(
structured_output_double,
nil,
:structured_output,
)
allow(structured_output_double).to receive(:read_buffered_property).with(
:matching_concepts,
).and_return(%w[programming ruby])
result = applier.match_concepts_to_content(content, concept_list)
expect(result).to eq(%w[programming ruby])
expect(bot_double).to have_received(:reply)
expect(structured_output_double).to have_received(:read_buffered_property).with(
:matching_concepts,
)
end
it "handles no structured output gracefully" do
content = "Test content"
concept_list = ["concept1"]
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
result = applier.match_concepts_to_content(content, concept_list)
expect(result).to eq([])
end
it "returns empty array when no matching concepts found" do
content = "This is about something else"
concept_list = %w[programming testing]
expected_response = [['{"matching_concepts": []}']]
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_return(expected_response)
result = applier.match_concepts_to_content(content, concept_list)
expect(result).to eq([])
end
it "handles missing matching_concepts key in response" do
content = "Test content"
concept_list = ["concept1"]
expected_response = [['{"other_key": ["value"]}']]
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
allow(LlmModel).to receive(:find).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_return(expected_response)
result = applier.match_concepts_to_content(content, concept_list)
expect(result).to eq([])
end
end
end

View File

@ -0,0 +1,269 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::InferredConcepts::Finder do
subject(:finder) { described_class.new }
fab!(:topic) { Fabricate(:topic, posts_count: 5, views: 200, like_count: 15) }
fab!(:post) { Fabricate(:post, like_count: 10) }
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
fab!(:llm_model) { Fabricate(:fake_model) }
before do
SiteSetting.inferred_concepts_generate_persona = -1
SiteSetting.inferred_concepts_deduplicate_persona = -1
SiteSetting.inferred_concepts_enabled = true
end
describe "#identify_concepts" do
it "returns empty array for blank content" do
expect(finder.identify_concepts("")).to eq([])
expect(finder.identify_concepts(nil)).to eq([])
end
it "uses ConceptFinder persona to identify concepts" do
content = "This is about Ruby programming and testing"
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
# Mock the persona and bot interaction
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_generate_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(
structured_output_double,
nil,
:structured_output,
)
allow(structured_output_double).to receive(:read_buffered_property).with(
:concepts,
).and_return(%w[ruby programming testing])
result = finder.identify_concepts(content)
expect(result).to eq(%w[ruby programming testing])
end
it "handles no structured output gracefully" do
content = "Test content"
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_generate_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
result = finder.identify_concepts(content)
expect(result).to eq([])
end
end
describe "#create_or_find_concepts" do
it "returns empty array for blank concept names" do
expect(finder.create_or_find_concepts([])).to eq([])
expect(finder.create_or_find_concepts(nil)).to eq([])
end
it "creates new concepts for new names" do
concept_names = %w[new_concept1 new_concept2]
result = finder.create_or_find_concepts(concept_names)
expect(result.length).to eq(2)
expect(result.map(&:name)).to match_array(concept_names)
expect(InferredConcept.where(name: concept_names).count).to eq(2)
end
it "finds existing concepts" do
concept_names = %w[programming testing]
result = finder.create_or_find_concepts(concept_names)
expect(result.length).to eq(2)
expect(result).to include(concept1, concept2)
end
it "handles mix of new and existing concepts" do
concept_names = %w[programming new_concept]
result = finder.create_or_find_concepts(concept_names)
expect(result.length).to eq(2)
expect(result.map(&:name)).to match_array(concept_names)
end
end
describe "#find_candidate_topics" do
let!(:good_topic) { Fabricate(:topic, posts_count: 6, views: 150, like_count: 12) }
let!(:bad_topic) { Fabricate(:topic, posts_count: 2, views: 50, like_count: 2) }
let!(:topic_with_concepts) do
t = Fabricate(:topic, posts_count: 8, views: 200, like_count: 20)
t.inferred_concepts << concept1
t
end
it "finds topics meeting minimum criteria" do
candidates = finder.find_candidate_topics(min_posts: 5, min_views: 100, min_likes: 10)
expect(candidates).to include(good_topic)
expect(candidates).not_to include(bad_topic)
expect(candidates).not_to include(topic_with_concepts) # already has concepts
end
it "respects limit parameter" do
candidates = finder.find_candidate_topics(limit: 1)
expect(candidates.length).to be <= 1
end
it "excludes specified topic IDs" do
candidates = finder.find_candidate_topics(exclude_topic_ids: [good_topic.id])
expect(candidates).not_to include(good_topic)
end
it "filters by category IDs when provided" do
category = Fabricate(:category)
topic_in_category =
Fabricate(:topic, category: category, posts_count: 6, views: 150, like_count: 12)
candidates = finder.find_candidate_topics(category_ids: [category.id])
expect(candidates).to include(topic_in_category)
expect(candidates).not_to include(good_topic)
end
it "filters by creation date" do
old_topic =
Fabricate(:topic, posts_count: 6, views: 150, like_count: 12, created_at: 45.days.ago)
candidates = finder.find_candidate_topics(created_after: 30.days.ago)
expect(candidates).to include(good_topic)
expect(candidates).not_to include(old_topic)
end
end
describe "#find_candidate_posts" do
let!(:good_post) { Fabricate(:post, like_count: 8, post_number: 2) }
let!(:bad_post) { Fabricate(:post, like_count: 2, post_number: 2) }
let!(:first_post) { Fabricate(:post, like_count: 10, post_number: 1) }
let!(:post_with_concepts) do
p = Fabricate(:post, like_count: 15, post_number: 3)
p.inferred_concepts << concept1
p
end
it "finds posts meeting minimum criteria" do
candidates = finder.find_candidate_posts(min_likes: 5)
expect(candidates).to include(good_post)
expect(candidates).not_to include(bad_post)
expect(candidates).not_to include(post_with_concepts) # already has concepts
end
it "excludes first posts by default" do
candidates = finder.find_candidate_posts(min_likes: 5)
expect(candidates).not_to include(first_post)
end
it "can include first posts when specified" do
candidates = finder.find_candidate_posts(min_likes: 5, exclude_first_posts: false)
expect(candidates).to include(first_post)
end
it "respects limit parameter" do
candidates = finder.find_candidate_posts(limit: 1)
expect(candidates.length).to be <= 1
end
it "excludes specified post IDs" do
candidates = finder.find_candidate_posts(exclude_post_ids: [good_post.id])
expect(candidates).not_to include(good_post)
end
it "filters by category IDs when provided" do
category = Fabricate(:category)
topic_in_category = Fabricate(:topic, category: category)
post_in_category = Fabricate(:post, topic: topic_in_category, like_count: 8, post_number: 2)
candidates = finder.find_candidate_posts(category_ids: [category.id])
expect(candidates).to include(post_in_category)
expect(candidates).not_to include(good_post)
end
it "filters by creation date" do
old_post = Fabricate(:post, like_count: 8, post_number: 2, created_at: 45.days.ago)
candidates = finder.find_candidate_posts(created_after: 30.days.ago)
expect(candidates).to include(good_post)
expect(candidates).not_to include(old_post)
end
end
describe "#deduplicate_concepts" do
it "returns empty result for blank concept names" do
result = finder.deduplicate_concepts([])
expect(result).to eq({ deduplicated_concepts: [], mapping: {} })
result = finder.deduplicate_concepts(nil)
expect(result).to eq({ deduplicated_concepts: [], mapping: {} })
end
it "uses ConceptDeduplicator persona to deduplicate concepts" do
concept_names = ["ruby", "Ruby programming", "testing", "unit testing"]
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_deduplicate_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(
structured_output_double,
nil,
:structured_output,
)
allow(structured_output_double).to receive(:read_buffered_property).with(
:streamlined_tags,
).and_return(%w[ruby testing])
result = finder.deduplicate_concepts(concept_names)
expect(result).to eq(%w[ruby testing])
end
it "handles no structured output gracefully" do
concept_names = %w[concept1 concept2]
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
bot_double = instance_double("DiscourseAi::Personas::Bot")
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_deduplicate_persona.to_i)
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
result = finder.deduplicate_concepts(concept_names)
expect(result).to eq([])
end
end
end

View File

@ -0,0 +1,239 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::InferredConcepts::Manager do
subject(:manager) { described_class.new }
fab!(:topic)
fab!(:post)
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
describe "#list_concepts" do
it "returns all concepts sorted by name" do
concepts = manager.list_concepts
expect(concepts).to include("programming", "testing")
expect(concepts).to eq(concepts.sort)
end
it "respects limit parameter" do
concepts = manager.list_concepts(limit: 1)
expect(concepts.length).to eq(1)
end
it "returns empty array when no concepts exist" do
InferredConcept.destroy_all
concepts = manager.list_concepts
expect(concepts).to eq([])
end
end
describe "#generate_concepts_from_content" do
before do
SiteSetting.inferred_concepts_generate_persona = -1
SiteSetting.inferred_concepts_enabled = true
end
it "returns empty array for blank content" do
expect(manager.generate_concepts_from_content("")).to eq([])
expect(manager.generate_concepts_from_content(nil)).to eq([])
end
it "delegates to Finder#identify_concepts" do
content = "This is about Ruby programming"
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:identify_concepts).with(content).and_return(%w[ruby programming])
allow(finder).to receive(:create_or_find_concepts).with(%w[ruby programming]).and_return(
[concept1],
)
result = manager.generate_concepts_from_content(content)
expect(result).to eq([concept1])
end
end
describe "#generate_concepts_from_topic" do
it "returns empty array for blank topic" do
expect(manager.generate_concepts_from_topic(nil)).to eq([])
end
it "extracts content and generates concepts" do
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
allow(applier).to receive(:topic_content_for_analysis).with(topic).and_return("topic content")
# Mock the finder instead of stubbing subject
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:identify_concepts).with("topic content").and_return(%w[programming])
allow(finder).to receive(:create_or_find_concepts).with(%w[programming]).and_return(
[concept1],
)
result = manager.generate_concepts_from_topic(topic)
expect(result).to eq([concept1])
end
end
describe "#generate_concepts_from_post" do
it "returns empty array for blank post" do
expect(manager.generate_concepts_from_post(nil)).to eq([])
end
it "extracts content and generates concepts" do
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
allow(applier).to receive(:post_content_for_analysis).with(post).and_return("post content")
# Mock the finder instead of stubbing subject
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:identify_concepts).with("post content").and_return(%w[testing])
allow(finder).to receive(:create_or_find_concepts).with(%w[testing]).and_return([concept1])
result = manager.generate_concepts_from_post(post)
expect(result).to eq([concept1])
end
end
describe "#match_topic_to_concepts" do
it "returns empty array for blank topic" do
expect(manager.match_topic_to_concepts(nil)).to eq([])
end
it "delegates to Applier#match_existing_concepts" do
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
allow(applier).to receive(:match_existing_concepts).with(topic).and_return([concept1])
result = manager.match_topic_to_concepts(topic)
expect(result).to eq([concept1])
end
end
describe "#match_post_to_concepts" do
it "returns empty array for blank post" do
expect(manager.match_post_to_concepts(nil)).to eq([])
end
it "delegates to Applier#match_existing_concepts_for_post" do
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
allow(applier).to receive(:match_existing_concepts_for_post).with(post).and_return([concept1])
result = manager.match_post_to_concepts(post)
expect(result).to eq([concept1])
end
end
describe "#search_topics_by_concept" do
it "returns empty array for non-existent concept" do
result = manager.search_topics_by_concept("nonexistent")
expect(result).to eq([])
end
it "returns topics associated with concept" do
concept1.topics << topic
result = manager.search_topics_by_concept("programming")
expect(result).to include(topic)
end
end
describe "#search_posts_by_concept" do
it "returns empty array for non-existent concept" do
result = manager.search_posts_by_concept("nonexistent")
expect(result).to eq([])
end
it "returns posts associated with concept" do
concept1.posts << post
result = manager.search_posts_by_concept("programming")
expect(result).to include(post)
end
end
describe "#match_content_to_concepts" do
it "returns empty array when no concepts exist" do
InferredConcept.destroy_all
result = manager.match_content_to_concepts("some content")
expect(result).to eq([])
end
it "delegates to Applier#match_concepts_to_content" do
content = "programming content"
existing_concepts = %w[programming testing]
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
all_double = instance_double(ActiveRecord::Relation)
allow(InferredConcept).to receive(:all).and_return(all_double)
allow(all_double).to receive(:pluck).with(:name).and_return(existing_concepts)
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
allow(applier).to receive(:match_concepts_to_content).with(
content,
existing_concepts,
).and_return(["programming"])
result = manager.match_content_to_concepts(content)
expect(result).to eq(["programming"])
end
end
describe "#find_candidate_topics" do
it "delegates to Finder#find_candidate_topics with options" do
opts = { limit: 50, min_posts: 3 }
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:find_candidate_topics).with(**opts).and_return([topic])
result = manager.find_candidate_topics(opts)
expect(result).to eq([topic])
end
end
describe "#find_candidate_posts" do
it "delegates to Finder#find_candidate_posts with options" do
opts = { limit: 25, min_likes: 2 }
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:find_candidate_posts).with(**opts).and_return([post])
result = manager.find_candidate_posts(opts)
expect(result).to eq([post])
end
end
describe "#deduplicate_concepts_by_letter" do
before do
# Create test concepts
%w[apple application banana berry cat car dog].each do |name|
Fabricate(:inferred_concept, name: name)
end
end
it "groups concepts by first letter and deduplicates" do
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
allow(finder).to receive(:deduplicate_concepts).at_least(:once).and_return(
%w[apple banana cat dog],
)
allow(InferredConcept).to receive(:where).and_call_original
allow(InferredConcept).to receive(:insert_all).and_call_original
manager.deduplicate_concepts_by_letter
end
it "handles empty concept list" do
InferredConcept.destroy_all
expect { manager.deduplicate_concepts_by_letter }.not_to raise_error
end
end
end

View File

@ -0,0 +1,32 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Personas::ConceptDeduplicator do
let(:persona) { described_class.new }
describe ".default_enabled" do
it "is disabled by default" do
expect(described_class.default_enabled).to eq(false)
end
end
describe "#system_prompt" do
let(:prompt) { persona.system_prompt }
it "specifies output format" do
expect(prompt).to include("<streamlined_tags>")
expect(prompt).to include("<o>")
expect(prompt).to include('"streamlined_tags": ["tag1", "tag3"]')
expect(prompt).to include("</o>")
end
end
describe "#response_format" do
it "defines correct response format" do
format = persona.response_format
expect(format).to eq(
[{ "array_type" => "string", "key" => "streamlined_tags", "type" => "array" }],
)
end
end
end

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Personas::ConceptFinder do
let(:persona) { described_class.new }
describe ".default_enabled" do
it "is disabled by default" do
expect(described_class.default_enabled).to eq(false)
end
end
describe "#system_prompt" do
before do
Fabricate(:inferred_concept, name: "programming")
Fabricate(:inferred_concept, name: "testing")
Fabricate(:inferred_concept, name: "ruby")
end
it "includes existing concepts when available" do
prompt = persona.system_prompt
InferredConcept.all.each { |concept| expect(prompt).to include(concept.name) }
end
it "handles empty existing concepts" do
InferredConcept.destroy_all
prompt = persona.system_prompt
expect(prompt).not_to include("following concepts already exist")
expect(prompt).to include("advanced concept tagging system")
end
it "limits existing concepts to 100" do
manager = instance_double(DiscourseAi::InferredConcepts::Manager)
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
allow(manager).to receive(:list_concepts).with(limit: 100).and_return(%w[concept1 concept2])
persona.system_prompt
end
it "includes format instructions" do
prompt = persona.system_prompt
expect(prompt).to include("<o>")
expect(prompt).to include('{"concepts": ["concept1", "concept2", "concept3"]}')
expect(prompt).to include("</o>")
end
it "includes language preservation instruction" do
prompt = persona.system_prompt
expect(prompt).to include("original language of the text")
end
end
describe "#response_format" do
it "defines correct response format" do
format = persona.response_format
expect(format).to eq([{ "array_type" => "string", "key" => "concepts", "type" => "array" }])
end
end
end

View File

@ -0,0 +1,36 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Personas::ConceptMatcher do
let(:persona) { described_class.new }
describe ".default_enabled" do
it "is disabled by default" do
expect(described_class.default_enabled).to eq(false)
end
end
describe "#system_prompt" do
let(:prompt) { persona.system_prompt }
it "includes placeholder for concept list" do
expect(prompt).to include("{inferred_concepts}")
end
it "specifies output format" do
expect(prompt).to include("matching_concepts")
expect(prompt).to include("<o>")
expect(prompt).to include('{"matching_concepts": ["concept1", "concept3", "concept5"]}')
expect(prompt).to include("</o>")
end
end
describe "#response_format" do
it "defines correct response format" do
format = persona.response_format
expect(format).to eq(
[{ "array_type" => "string", "key" => "matching_concepts", "type" => "array" }],
)
end
end
end

View File

@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona
{participants}
{time}
{resource_url}
{inferred_concepts}
PROMPT
end
end
@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
end
let(:resource_url) { "https://path-to-resource" }
let(:inferred_concepts) { %w[bulbassaur charmander squirtle].join(", ") }
let(:context) do
DiscourseAi::Personas::BotContext.new(
@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
time: Time.zone.now,
participants: topic_with_users.allowed_users.map(&:username).join(", "),
resource_url: resource_url,
inferred_concepts: inferred_concepts,
)
end
@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
expect(system_message).to include("joe, jane")
expect(system_message).to include(Time.zone.now.to_s)
expect(system_message).to include(resource_url)
expect(system_message).to include(inferred_concepts)
tools = rendered.tools

View File

@ -0,0 +1,61 @@
# frozen_string_literal: true
RSpec.describe InferredConcept do
describe "validations" do
it "requires a name" do
concept = InferredConcept.new
expect(concept).not_to be_valid
expect(concept.errors[:name]).to include("can't be blank")
end
it "requires unique names" do
Fabricate(:inferred_concept, name: "ruby")
concept = InferredConcept.new(name: "ruby")
expect(concept).not_to be_valid
expect(concept.errors[:name]).to include("has already been taken")
end
it "is valid with a unique name" do
concept = Fabricate(:inferred_concept, name: "programming")
expect(concept).to be_valid
end
end
describe "associations" do
fab!(:topic)
fab!(:post)
fab!(:concept) { Fabricate(:inferred_concept, name: "programming") }
it "can be associated with topics" do
concept.topics << topic
expect(concept.topics).to include(topic)
expect(topic.inferred_concepts).to include(concept)
end
it "can be associated with posts" do
concept.posts << post
expect(concept.posts).to include(post)
expect(post.inferred_concepts).to include(concept)
end
it "can have multiple topics and posts" do
topic2 = Fabricate(:topic)
post2 = Fabricate(:post)
concept.topics << [topic, topic2]
concept.posts << [post, post2]
expect(concept.topics.count).to eq(2)
expect(concept.posts.count).to eq(2)
end
end
describe "database constraints" do
it "has the expected schema" do
concept = Fabricate(:inferred_concept)
expect(concept).to respond_to(:name)
expect(concept).to respond_to(:created_at)
expect(concept).to respond_to(:updated_at)
end
end
end

View File

@ -19,7 +19,7 @@ RSpec.describe DiscourseAi::Admin::AiFeaturesController do
get "/admin/plugins/discourse-ai/ai-features.json"
expect(response.status).to eq(200)
expect(response.parsed_body["ai_features"].count).to eq(4)
expect(response.parsed_body["ai_features"].count).to eq(5)
end
end

View File

@ -29,7 +29,7 @@ RSpec.describe "Admin AI features configuration", type: :system, js: true do
).to eq(I18n.t("discourse_ai.features.summarization.name"))
expect(ai_features_page).to have_configured_feature_items(1)
expect(ai_features_page).to have_unconfigured_feature_items(3)
expect(ai_features_page).to have_unconfigured_feature_items(4)
end
it "lists the persona used for the corresponding AI feature" do