mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-25 00:52:14 +00:00
FEATURE: add inferred concepts system (#1330)
* FEATURE: add inferred concepts system This commit adds a new inferred concepts system that: - Creates a model for storing concept labels that can be applied to topics - Provides AI personas for finding new concepts and matching existing ones - Adds jobs for generating concepts from popular topics - Includes a scheduled job that automatically processes engaging topics * FEATURE: Extend inferred concepts to include posts * Adds support for concepts to be inferred from and applied to posts * Replaces daily task with one that handles both topics and posts * Adds database migration for posts_inferred_concepts join table * Updates PersonaContext to include inferred concepts Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com> Co-authored-by: Keegan George <kgeorge13@gmail.com>
This commit is contained in:
parent
4ce8973e56
commit
478f31de47
70
app/jobs/regular/generate_inferred_concepts.rb
Normal file
70
app/jobs/regular/generate_inferred_concepts.rb
Normal file
@ -0,0 +1,70 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class GenerateInferredConcepts < ::Jobs::Base
|
||||
sidekiq_options queue: "low"
|
||||
|
||||
# Process items to generate new concepts
|
||||
#
|
||||
# @param args [Hash] Contains job arguments
|
||||
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
|
||||
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
|
||||
# @option args [Integer] :batch_size (100) Number of items to process in each batch
|
||||
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
|
||||
def execute(args = {})
|
||||
return if args[:item_ids].blank? || args[:item_type].blank?
|
||||
|
||||
if %w[topics posts].exclude?(args[:item_type])
|
||||
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
|
||||
return
|
||||
end
|
||||
|
||||
# Process items in smaller batches to avoid memory issues
|
||||
batch_size = args[:batch_size] || 100
|
||||
|
||||
# Get the list of item IDs
|
||||
item_ids = args[:item_ids]
|
||||
match_only = args[:match_only] || false
|
||||
|
||||
# Process items in batches
|
||||
item_ids.each_slice(batch_size) do |batch_item_ids|
|
||||
process_batch(batch_item_ids, args[:item_type], match_only)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_batch(item_ids, item_type, match_only)
|
||||
klass = item_type.singularize.classify.constantize
|
||||
items = klass.where(id: item_ids)
|
||||
manager = DiscourseAi::InferredConcepts::Manager.new
|
||||
|
||||
items.each do |item|
|
||||
begin
|
||||
process_item(item, item_type, match_only, manager)
|
||||
rescue => e
|
||||
Rails.logger.error(
|
||||
"Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}",
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def process_item(item, item_type, match_only, manager)
|
||||
# Use the Manager method that handles both identifying and creating concepts
|
||||
if match_only
|
||||
if item_type == "topics"
|
||||
manager.match_topic_to_concepts(item)
|
||||
else # posts
|
||||
manager.match_post_to_concepts(item)
|
||||
end
|
||||
else
|
||||
if item_type == "topics"
|
||||
manager.generate_concepts_from_topic(item)
|
||||
else # posts
|
||||
manager.generate_concepts_from_post(item)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
87
app/jobs/scheduled/generate_concepts_from_popular_items.rb
Normal file
87
app/jobs/scheduled/generate_concepts_from_popular_items.rb
Normal file
@ -0,0 +1,87 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
|
||||
every 1.day
|
||||
|
||||
# This job runs daily and generates new concepts from popular topics and posts
|
||||
# It selects items based on engagement metrics and generates concepts from their content
|
||||
def execute(_args)
|
||||
return unless SiteSetting.inferred_concepts_enabled
|
||||
|
||||
process_popular_topics
|
||||
process_popular_posts
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_popular_topics
|
||||
# Find candidate topics that are popular and don't have concepts yet
|
||||
manager = DiscourseAi::InferredConcepts::Manager.new
|
||||
candidates =
|
||||
manager.find_candidate_topics(
|
||||
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
|
||||
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
|
||||
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
|
||||
min_views: SiteSetting.inferred_concepts_min_views || 100,
|
||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago,
|
||||
)
|
||||
|
||||
return if candidates.blank?
|
||||
|
||||
# Process candidate topics - first generate concepts, then match
|
||||
Jobs.enqueue(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "topics",
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
if SiteSetting.inferred_concepts_background_match
|
||||
# Schedule a follow-up job to match existing concepts
|
||||
Jobs.enqueue_in(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: "topics",
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
match_only: true,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def process_popular_posts
|
||||
# Find candidate posts that are popular and don't have concepts yet
|
||||
manager = DiscourseAi::InferredConcepts::Manager.new
|
||||
candidates =
|
||||
manager.find_candidate_posts(
|
||||
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
|
||||
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
|
||||
exclude_first_posts: true,
|
||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago,
|
||||
)
|
||||
|
||||
return if candidates.blank?
|
||||
|
||||
# Process candidate posts - first generate concepts, then match
|
||||
Jobs.enqueue(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "posts",
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
if SiteSetting.inferred_concepts_background_match
|
||||
# Schedule a follow-up job to match against existing concepts
|
||||
Jobs.enqueue_in(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: "posts",
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
match_only: true,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
25
app/models/inferred_concept.rb
Normal file
25
app/models/inferred_concept.rb
Normal file
@ -0,0 +1,25 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class InferredConcept < ActiveRecord::Base
|
||||
has_many :inferred_concept_topics
|
||||
has_many :topics, through: :inferred_concept_topics
|
||||
|
||||
has_many :inferred_concept_posts
|
||||
has_many :posts, through: :inferred_concept_posts
|
||||
|
||||
validates :name, presence: true, uniqueness: true
|
||||
end
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: inferred_concepts
|
||||
#
|
||||
# id :bigint not null, primary key
|
||||
# name :string not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
# index_inferred_concepts_on_name (name) UNIQUE
|
||||
#
|
25
app/models/inferred_concept_post.rb
Normal file
25
app/models/inferred_concept_post.rb
Normal file
@ -0,0 +1,25 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class InferredConceptPost < ActiveRecord::Base
|
||||
belongs_to :inferred_concept
|
||||
belongs_to :post
|
||||
|
||||
validates :inferred_concept_id, presence: true
|
||||
validates :post_id, presence: true
|
||||
validates :inferred_concept_id, uniqueness: { scope: :post_id }
|
||||
end
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: inferred_concept_posts
|
||||
#
|
||||
# inferred_concept_id :bigint
|
||||
# post_id :bigint
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
# index_inferred_concept_posts_on_inferred_concept_id (inferred_concept_id)
|
||||
# index_inferred_concept_posts_uniqueness (post_id,inferred_concept_id) UNIQUE
|
||||
#
|
25
app/models/inferred_concept_topic.rb
Normal file
25
app/models/inferred_concept_topic.rb
Normal file
@ -0,0 +1,25 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class InferredConceptTopic < ActiveRecord::Base
|
||||
belongs_to :inferred_concept
|
||||
belongs_to :topic
|
||||
|
||||
validates :inferred_concept_id, presence: true
|
||||
validates :topic_id, presence: true
|
||||
validates :inferred_concept_id, uniqueness: { scope: :topic_id }
|
||||
end
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: inferred_concept_topics
|
||||
#
|
||||
# inferred_concept_id :bigint
|
||||
# topic_id :bigint
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
# index_inferred_concept_topics_on_inferred_concept_id (inferred_concept_id)
|
||||
# index_inferred_concept_topics_uniqueness (topic_id,inferred_concept_id) UNIQUE
|
||||
#
|
34
app/serializers/ai_inferred_concept_post_serializer.rb
Normal file
34
app/serializers/ai_inferred_concept_post_serializer.rb
Normal file
@ -0,0 +1,34 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class AiInferredConceptPostSerializer < ApplicationSerializer
|
||||
attributes :id,
|
||||
:post_number,
|
||||
:topic_id,
|
||||
:topic_title,
|
||||
:username,
|
||||
:avatar_template,
|
||||
:created_at,
|
||||
:updated_at,
|
||||
:excerpt,
|
||||
:truncated,
|
||||
:inferred_concepts
|
||||
|
||||
def avatar_template
|
||||
User.avatar_template(object.username, object.uploaded_avatar_id)
|
||||
end
|
||||
|
||||
def excerpt
|
||||
Post.excerpt(object.cooked)
|
||||
end
|
||||
|
||||
def truncated
|
||||
object.cooked.length > SiteSetting.post_excerpt_maxlength
|
||||
end
|
||||
|
||||
def inferred_concepts
|
||||
ActiveModel::ArraySerializer.new(
|
||||
object.inferred_concepts,
|
||||
each_serializer: InferredConceptSerializer,
|
||||
)
|
||||
end
|
||||
end
|
5
app/serializers/inferred_concept_serializer.rb
Normal file
5
app/serializers/inferred_concept_serializer.rb
Normal file
@ -0,0 +1,5 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class InferredConceptSerializer < ApplicationSerializer
|
||||
attributes :id, :name, :created_at, :updated_at
|
||||
end
|
@ -22,10 +22,20 @@ export default class AiPersonaResponseFormatEditor extends Component {
|
||||
type: "string",
|
||||
},
|
||||
type: {
|
||||
type: "string",
|
||||
enum: ["string", "integer", "boolean", "array"],
|
||||
},
|
||||
array_type: {
|
||||
type: "string",
|
||||
enum: ["string", "integer", "boolean"],
|
||||
options: {
|
||||
dependencies: {
|
||||
type: "array",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ["key", "type"],
|
||||
},
|
||||
};
|
||||
|
||||
@ -41,7 +51,11 @@ export default class AiPersonaResponseFormatEditor extends Component {
|
||||
const toDisplay = {};
|
||||
|
||||
this.args.data.response_format.forEach((keyDesc) => {
|
||||
toDisplay[keyDesc.key] = keyDesc.type;
|
||||
if (keyDesc.type === "array") {
|
||||
toDisplay[keyDesc.key] = `[${keyDesc.array_type}]`;
|
||||
} else {
|
||||
toDisplay[keyDesc.key] = keyDesc.type;
|
||||
}
|
||||
});
|
||||
|
||||
return prettyJSON(toDisplay);
|
||||
|
@ -330,6 +330,15 @@ en:
|
||||
short_summarizer:
|
||||
name: "Summarizer (short form)"
|
||||
description: "Default persona used to power AI short summaries for topic lists' items"
|
||||
concept_finder:
|
||||
name: "Concept Finder"
|
||||
description: "AI Bot specialized in identifying concepts and themes in content"
|
||||
concept_matcher:
|
||||
name: "Concept Matcher"
|
||||
description: "AI Bot specialized in matching content against existing concepts"
|
||||
concept_deduplicator:
|
||||
name: "Concept Deduplicator"
|
||||
description: "AI Bot specialized in deduplicating concepts"
|
||||
topic_not_found: "Summary unavailable, topic not found!"
|
||||
summarizing: "Summarizing topic"
|
||||
searching: "Searching for: '%{query}'"
|
||||
@ -549,6 +558,9 @@ en:
|
||||
discord_search:
|
||||
name: "Discord Search"
|
||||
description: "Adds the ability to search Discord channels"
|
||||
inferred_concepts:
|
||||
name: "Inferred Concepts"
|
||||
description: "Classifies topics and posts into areas of interest / labels."
|
||||
|
||||
errors:
|
||||
quota_exceeded: "You have exceeded the quota for this model. Please try again in %{relative_time}."
|
||||
|
@ -417,3 +417,55 @@ discourse_ai:
|
||||
default: false
|
||||
client: false
|
||||
hidden: true
|
||||
|
||||
inferred_concepts_enabled:
|
||||
default: false
|
||||
client: true
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_background_match:
|
||||
default: false
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_daily_topics_limit:
|
||||
default: 20
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_min_posts:
|
||||
default: 5
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_min_likes:
|
||||
default: 10
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_min_views:
|
||||
default: 100
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_lookback_days:
|
||||
default: 30
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_daily_posts_limit:
|
||||
default: 30
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_post_min_likes:
|
||||
default: 5
|
||||
client: false
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_generate_persona:
|
||||
default: "-15"
|
||||
type: enum
|
||||
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_match_persona:
|
||||
default: "-16"
|
||||
type: enum
|
||||
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
||||
area: "ai-features/inferred_concepts"
|
||||
inferred_concepts_deduplicate_persona:
|
||||
default: "-17"
|
||||
type: enum
|
||||
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
||||
area: "ai-features/inferred_concepts"
|
||||
|
@ -72,9 +72,13 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
|
||||
|
||||
persona.tools = tools.map { |name, value| [name, value] }
|
||||
|
||||
persona.response_format = instance.response_format
|
||||
# Only set response_format if it's not defined as a method in the persona class
|
||||
if !instance.class.instance_methods.include?(:response_format)
|
||||
persona.response_format = instance.response_format
|
||||
end
|
||||
|
||||
persona.examples = instance.examples
|
||||
# Only set examples if it's not defined as a method in the persona class
|
||||
persona.examples = instance.examples if !instance.class.instance_methods.include?(:examples)
|
||||
|
||||
persona.system_prompt = instance.system_prompt
|
||||
persona.top_p = instance.top_p
|
||||
|
11
db/migrate/20250508182047_create_inferred_concepts_table.rb
Normal file
11
db/migrate/20250508182047_create_inferred_concepts_table.rb
Normal file
@ -0,0 +1,11 @@
|
||||
# frozen_string_literal: true
|
||||
class CreateInferredConceptsTable < ActiveRecord::Migration[7.2]
|
||||
def change
|
||||
create_table :inferred_concepts do |t|
|
||||
t.string :name, null: false
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :inferred_concepts, :name, unique: true
|
||||
end
|
||||
end
|
18
db/migrate/20250508183456_create_inferred_concept_topics.rb
Normal file
18
db/migrate/20250508183456_create_inferred_concept_topics.rb
Normal file
@ -0,0 +1,18 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class CreateInferredConceptTopics < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :inferred_concept_topics, id: false do |t|
|
||||
t.bigint :inferred_concept_id
|
||||
t.bigint :topic_id
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :inferred_concept_topics,
|
||||
%i[topic_id inferred_concept_id],
|
||||
unique: true,
|
||||
name: "index_inferred_concept_topics_uniqueness"
|
||||
|
||||
add_index :inferred_concept_topics, :inferred_concept_id
|
||||
end
|
||||
end
|
18
db/migrate/20250509000001_create_inferred_concept_posts.rb
Normal file
18
db/migrate/20250509000001_create_inferred_concept_posts.rb
Normal file
@ -0,0 +1,18 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class CreateInferredConceptPosts < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :inferred_concept_posts, id: false do |t|
|
||||
t.bigint :inferred_concept_id
|
||||
t.bigint :post_id
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :inferred_concept_posts,
|
||||
%i[post_id inferred_concept_id],
|
||||
unique: true,
|
||||
name: "index_inferred_concept_posts_uniqueness"
|
||||
|
||||
add_index :inferred_concept_posts, :inferred_concept_id
|
||||
end
|
||||
end
|
@ -9,6 +9,7 @@ module DiscourseAi
|
||||
@stream_consumer = stream_consumer
|
||||
@current_key = nil
|
||||
@current_value = nil
|
||||
@tracking_array = false
|
||||
@parser = DiscourseAi::Completions::JsonStreamingParser.new
|
||||
|
||||
@parser.key do |k|
|
||||
@ -16,12 +17,28 @@ module DiscourseAi
|
||||
@current_value = nil
|
||||
end
|
||||
|
||||
@parser.value do |v|
|
||||
@parser.value do |value|
|
||||
if @current_key
|
||||
stream_consumer.notify_progress(@current_key, v)
|
||||
@current_key = nil
|
||||
if @tracking_array
|
||||
@current_value << value
|
||||
stream_consumer.notify_progress(@current_key, @current_value)
|
||||
else
|
||||
stream_consumer.notify_progress(@current_key, value)
|
||||
@current_key = nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@parser.start_array do
|
||||
@tracking_array = true
|
||||
@current_value = []
|
||||
end
|
||||
|
||||
@parser.end_array do
|
||||
@tracking_array = false
|
||||
@current_key = nil
|
||||
@current_value = nil
|
||||
end
|
||||
end
|
||||
|
||||
def broken?
|
||||
@ -46,8 +63,9 @@ module DiscourseAi
|
||||
end
|
||||
|
||||
if @parser.state == :start_string && @current_key
|
||||
buffered = @tracking_array ? [@parser.buf] : @parser.buf
|
||||
# this is is worth notifying
|
||||
stream_consumer.notify_progress(@current_key, @parser.buf)
|
||||
stream_consumer.notify_progress(@current_key, buffered)
|
||||
end
|
||||
|
||||
@current_key = nil if @parser.state == :end_value
|
||||
|
@ -45,7 +45,7 @@ module DiscourseAi
|
||||
@property_cursors[prop_name] = @tracked[prop_name].length
|
||||
unread
|
||||
else
|
||||
# Ints and bools are always returned as is.
|
||||
# Ints and bools, and arrays are always returned as is.
|
||||
@tracked[prop_name]
|
||||
end
|
||||
end
|
||||
|
@ -7,7 +7,7 @@ module DiscourseAi
|
||||
@persona =
|
||||
AiPersona
|
||||
.all_personas(enabled_only: false)
|
||||
.find { |persona| persona.id == SiteSetting.ai_discord_search_persona.to_i }
|
||||
.find { |p| p.id == SiteSetting.ai_discord_search_persona.to_i }
|
||||
.new
|
||||
@bot =
|
||||
DiscourseAi::Personas::Bot.as(
|
||||
|
@ -36,6 +36,14 @@ module DiscourseAi
|
||||
persona_setting_name: "ai_discord_search_persona",
|
||||
enable_setting_name: "ai_discord_search_enabled",
|
||||
},
|
||||
{
|
||||
id: 5,
|
||||
name_ref: "inferred_concepts",
|
||||
name_key: "discourse_ai.features.inferred_concepts.name",
|
||||
description_key: "discourse_ai.features.inferred_concepts.description",
|
||||
persona_setting_name: "inferred_concepts_generate_persona",
|
||||
enable_setting_name: "inferred_concepts_enabled",
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
|
135
lib/inferred_concepts/applier.rb
Normal file
135
lib/inferred_concepts/applier.rb
Normal file
@ -0,0 +1,135 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module InferredConcepts
|
||||
class Applier
|
||||
# Associates the provided concepts with a topic
|
||||
# topic: a Topic instance
|
||||
# concepts: an array of InferredConcept instances
|
||||
def apply_to_topic(topic, concepts)
|
||||
return if topic.blank? || concepts.blank?
|
||||
|
||||
topic.inferred_concepts << concepts
|
||||
end
|
||||
|
||||
# Associates the provided concepts with a post
|
||||
# post: a Post instance
|
||||
# concepts: an array of InferredConcept instances
|
||||
def apply_to_post(post, concepts)
|
||||
return if post.blank? || concepts.blank?
|
||||
|
||||
post.inferred_concepts << concepts
|
||||
end
|
||||
|
||||
# Extracts content from a topic for concept analysis
|
||||
# Returns a string with the topic title and first few posts
|
||||
def topic_content_for_analysis(topic)
|
||||
return "" if topic.blank?
|
||||
|
||||
# Combine title and first few posts for analysis
|
||||
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
|
||||
|
||||
content = "Title: #{topic.title}\n\n"
|
||||
content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n")
|
||||
|
||||
content
|
||||
end
|
||||
|
||||
# Extracts content from a post for concept analysis
|
||||
# Returns a string with the post content
|
||||
def post_content_for_analysis(post)
|
||||
return "" if post.blank?
|
||||
|
||||
# Get the topic title for context
|
||||
topic_title = post.topic&.title || ""
|
||||
|
||||
content = "Topic: #{topic_title}\n\n"
|
||||
content += "Post by #{post.user.username}:\n#{post.raw}"
|
||||
|
||||
content
|
||||
end
|
||||
|
||||
# Match a topic with existing concepts
|
||||
def match_existing_concepts(topic)
|
||||
return [] if topic.blank?
|
||||
|
||||
# Get content to analyze
|
||||
content = topic_content_for_analysis(topic)
|
||||
|
||||
# Get all existing concepts
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts
|
||||
return [] if existing_concepts.empty?
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
matched_concept_names = match_concepts_to_content(content, existing_concepts)
|
||||
|
||||
# Find concepts in the database
|
||||
matched_concepts = InferredConcept.where(name: matched_concept_names)
|
||||
|
||||
# Apply concepts to the topic
|
||||
apply_to_topic(topic, matched_concepts)
|
||||
|
||||
matched_concepts
|
||||
end
|
||||
|
||||
# Match a post with existing concepts
|
||||
def match_existing_concepts_for_post(post)
|
||||
return [] if post.blank?
|
||||
|
||||
# Get content to analyze
|
||||
content = post_content_for_analysis(post)
|
||||
|
||||
# Get all existing concepts
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts
|
||||
return [] if existing_concepts.empty?
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
matched_concept_names = match_concepts_to_content(content, existing_concepts)
|
||||
|
||||
# Find concepts in the database
|
||||
matched_concepts = InferredConcept.where(name: matched_concept_names)
|
||||
|
||||
# Apply concepts to the post
|
||||
apply_to_post(post, matched_concepts)
|
||||
|
||||
matched_concepts
|
||||
end
|
||||
|
||||
# Use ConceptMatcher persona to match content against provided concepts
|
||||
def match_concepts_to_content(content, concept_list)
|
||||
return [] if content.blank? || concept_list.blank?
|
||||
|
||||
# Prepare user message with only the content
|
||||
user_message = content
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
|
||||
persona =
|
||||
AiPersona
|
||||
.all_personas(enabled_only: false)
|
||||
.find { |p| p.id == SiteSetting.inferred_concepts_match_persona.to_i }
|
||||
.new
|
||||
|
||||
llm = LlmModel.find(persona.class.default_llm_id)
|
||||
|
||||
input = { type: :user, content: content }
|
||||
|
||||
context =
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
messages: [input],
|
||||
user: Discourse.system_user,
|
||||
inferred_concepts: concept_list,
|
||||
)
|
||||
|
||||
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
|
||||
structured_output = nil
|
||||
|
||||
bot.reply(context) do |partial, _, type|
|
||||
structured_output = partial if type == :structured_output
|
||||
end
|
||||
|
||||
structured_output&.read_buffered_property(:matching_concepts) || []
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
176
lib/inferred_concepts/finder.rb
Normal file
176
lib/inferred_concepts/finder.rb
Normal file
@ -0,0 +1,176 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module InferredConcepts
|
||||
class Finder
|
||||
# Identifies potential concepts from provided content
|
||||
# Returns an array of concept names (strings)
|
||||
def identify_concepts(content)
|
||||
return [] if content.blank?
|
||||
|
||||
# Use the ConceptFinder persona to identify concepts
|
||||
persona =
|
||||
AiPersona
|
||||
.all_personas(enabled_only: false)
|
||||
.find { |p| p.id == SiteSetting.inferred_concepts_generate_persona.to_i }
|
||||
.new
|
||||
|
||||
llm = LlmModel.find(persona.default_llm_id)
|
||||
context =
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
messages: [{ type: :user, content: content }],
|
||||
user: Discourse.system_user,
|
||||
inferred_concepts: DiscourseAi::InferredConcepts::Manager.new.list_concepts,
|
||||
)
|
||||
|
||||
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
|
||||
structured_output = nil
|
||||
|
||||
bot.reply(context) do |partial, _, type|
|
||||
structured_output = partial if type == :structured_output
|
||||
end
|
||||
|
||||
structured_output&.read_buffered_property(:concepts) || []
|
||||
end
|
||||
|
||||
# Creates or finds concepts in the database from provided names
|
||||
# Returns an array of InferredConcept instances
|
||||
def create_or_find_concepts(concept_names)
|
||||
return [] if concept_names.blank?
|
||||
|
||||
concept_names.map { |name| InferredConcept.find_or_create_by(name: name) }
|
||||
end
|
||||
|
||||
# Finds candidate topics to use for concept generation
|
||||
#
|
||||
# @param limit [Integer] Maximum number of topics to return
|
||||
# @param min_posts [Integer] Minimum number of posts in topic
|
||||
# @param min_likes [Integer] Minimum number of likes across all posts
|
||||
# @param min_views [Integer] Minimum number of views
|
||||
# @param exclude_topic_ids [Array<Integer>] Topic IDs to exclude
|
||||
# @param category_ids [Array<Integer>] Only include topics from these categories (optional)
|
||||
# @param created_after [DateTime] Only include topics created after this time (optional)
|
||||
# @return [Array<Topic>] Array of Topic objects that are good candidates
|
||||
def find_candidate_topics(
|
||||
limit: 100,
|
||||
min_posts: 5,
|
||||
min_likes: 10,
|
||||
min_views: 100,
|
||||
exclude_topic_ids: [],
|
||||
category_ids: nil,
|
||||
created_after: 30.days.ago
|
||||
)
|
||||
query =
|
||||
Topic.where(
|
||||
"topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?",
|
||||
min_posts,
|
||||
min_views,
|
||||
min_likes,
|
||||
)
|
||||
|
||||
# Apply additional filters
|
||||
query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present?
|
||||
query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present?
|
||||
query = query.where("topics.created_at >= ?", created_after) if created_after.present?
|
||||
|
||||
# Exclude PM topics (if they exist in Discourse)
|
||||
query = query.where(archetype: Archetype.default)
|
||||
|
||||
# Exclude topics that already have concepts
|
||||
topics_with_concepts = <<~SQL
|
||||
SELECT DISTINCT topic_id
|
||||
FROM inferred_concept_topics
|
||||
SQL
|
||||
|
||||
query = query.where("topics.id NOT IN (#{topics_with_concepts})")
|
||||
|
||||
# Score and order topics by engagement (combination of views, likes, and posts)
|
||||
query =
|
||||
query.select(
|
||||
"topics.*,
|
||||
(topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score",
|
||||
).order("engagement_score DESC")
|
||||
|
||||
# Return limited number of topics
|
||||
query.limit(limit)
|
||||
end
|
||||
|
||||
# Find candidate posts that are good for concept generation
|
||||
#
|
||||
# @param limit [Integer] Maximum number of posts to return
|
||||
# @param min_likes [Integer] Minimum number of likes
|
||||
# @param exclude_first_posts [Boolean] Exclude first posts in topics
|
||||
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
|
||||
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
|
||||
# @param created_after [DateTime] Only include posts created after this time
|
||||
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||
def find_candidate_posts(
|
||||
limit: 100,
|
||||
min_likes: 5,
|
||||
exclude_first_posts: true,
|
||||
exclude_post_ids: [],
|
||||
category_ids: nil,
|
||||
created_after: 30.days.ago
|
||||
)
|
||||
query = Post.where("posts.like_count >= ?", min_likes)
|
||||
|
||||
# Exclude first posts if specified
|
||||
query = query.where("posts.post_number > 1") if exclude_first_posts
|
||||
|
||||
# Apply additional filters
|
||||
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
|
||||
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
|
||||
|
||||
# Filter by category if specified
|
||||
if category_ids.present?
|
||||
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
|
||||
end
|
||||
|
||||
# Exclude posts that already have concepts
|
||||
posts_with_concepts = <<~SQL
|
||||
SELECT DISTINCT post_id
|
||||
FROM inferred_concept_posts
|
||||
SQL
|
||||
|
||||
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
|
||||
|
||||
# Order by engagement (likes)
|
||||
query = query.order(like_count: :desc)
|
||||
|
||||
# Return limited number of posts
|
||||
query.limit(limit)
|
||||
end
|
||||
|
||||
# Deduplicate and standardize a list of concepts
|
||||
# @param concept_names [Array<String>] List of concept names to deduplicate
|
||||
# @return [Hash] Hash with deduplicated concepts and mapping
|
||||
def deduplicate_concepts(concept_names)
|
||||
return { deduplicated_concepts: [], mapping: {} } if concept_names.blank?
|
||||
|
||||
# Use the ConceptDeduplicator persona to deduplicate concepts
|
||||
persona =
|
||||
AiPersona
|
||||
.all_personas(enabled_only: false)
|
||||
.find { |p| p.id == SiteSetting.inferred_concepts_deduplicate_persona.to_i }
|
||||
.new
|
||||
|
||||
llm = LlmModel.find(persona.default_llm_id)
|
||||
|
||||
# Create the input for the deduplicator
|
||||
input = { type: :user, content: concept_names.join(", ") }
|
||||
|
||||
context =
|
||||
DiscourseAi::Personas::BotContext.new(messages: [input], user: Discourse.system_user)
|
||||
|
||||
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
|
||||
structured_output = nil
|
||||
|
||||
bot.reply(context) do |partial, _, type|
|
||||
structured_output = partial if type == :structured_output
|
||||
end
|
||||
|
||||
structured_output&.read_buffered_property(:streamlined_tags) || []
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
201
lib/inferred_concepts/manager.rb
Normal file
201
lib/inferred_concepts/manager.rb
Normal file
@ -0,0 +1,201 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module InferredConcepts
|
||||
class Manager
|
||||
# Get a list of existing concepts
|
||||
# @param limit [Integer, nil] Optional maximum number of concepts to return
|
||||
# @return [Array<InferredConcept>] Array of InferredConcept objects
|
||||
def list_concepts(limit: nil)
|
||||
query = InferredConcept.all.order("name ASC")
|
||||
|
||||
# Apply limit if provided
|
||||
query = query.limit(limit) if limit.present?
|
||||
|
||||
query.pluck(:name)
|
||||
end
|
||||
|
||||
# Deduplicate concepts in batches by letter
|
||||
# This method will:
|
||||
# 1. Group concepts by first letter
|
||||
# 2. Process each letter group separately through the deduplicator
|
||||
# 3. Do a final pass with all deduplicated concepts
|
||||
# @return [Hash] Statistics about the deduplication process
|
||||
def deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 150)
|
||||
# Get all concepts
|
||||
all_concepts = list_concepts
|
||||
return if all_concepts.empty?
|
||||
|
||||
letter_groups = Hash.new { |h, k| h[k] = [] }
|
||||
|
||||
# Group concepts by first letter
|
||||
all_concepts.each do |concept|
|
||||
first_char = concept[0]&.upcase
|
||||
|
||||
if first_char && first_char.match?(/[A-Z]/)
|
||||
letter_groups[first_char] << concept
|
||||
else
|
||||
# Non-alphabetic or empty concepts go in a special group
|
||||
letter_groups["#"] << concept
|
||||
end
|
||||
end
|
||||
|
||||
# Process each letter group
|
||||
letter_deduplicated_concepts = []
|
||||
finder = DiscourseAi::InferredConcepts::Finder.new
|
||||
|
||||
letter_groups.each do |letter, concepts|
|
||||
next if concepts.empty?
|
||||
|
||||
batches = concepts.each_slice(per_letter_batch).to_a
|
||||
|
||||
batches.each do |batch|
|
||||
result = finder.deduplicate_concepts(batch)
|
||||
letter_deduplicated_concepts.concat(result)
|
||||
end
|
||||
end
|
||||
|
||||
# Final pass with all deduplicated concepts
|
||||
if letter_deduplicated_concepts.present?
|
||||
final_result = []
|
||||
|
||||
batches = letter_deduplicated_concepts.each_slice(full_pass_batch).to_a
|
||||
batches.each do |batch|
|
||||
dedups = finder.deduplicate_concepts(batch)
|
||||
final_result.concat(dedups)
|
||||
end
|
||||
|
||||
# Remove duplicates
|
||||
final_result.uniq!
|
||||
|
||||
# Apply the deduplicated concepts
|
||||
InferredConcept.where.not(name: final_result).destroy_all
|
||||
InferredConcept.insert_all(final_result.map { |concept| { name: concept } })
|
||||
end
|
||||
end
|
||||
|
||||
# Extract new concepts from arbitrary content
|
||||
# @param content [String] The content to analyze
|
||||
# @return [Array<String>] The identified concept names
|
||||
def identify_concepts(content)
|
||||
DiscourseAi::InferredConcepts::Finder.new.identify_concepts(content)
|
||||
end
|
||||
|
||||
# Identify and create concepts from content without applying them to any topic
|
||||
# @param content [String] The content to analyze
|
||||
# @return [Array<InferredConcept>] The created or found concepts
|
||||
def generate_concepts_from_content(content)
|
||||
return [] if content.blank?
|
||||
|
||||
# Identify concepts
|
||||
finder = DiscourseAi::InferredConcepts::Finder.new
|
||||
concept_names = finder.identify_concepts(content)
|
||||
return [] if concept_names.blank?
|
||||
|
||||
# Create or find concepts in the database
|
||||
finder.create_or_find_concepts(concept_names)
|
||||
end
|
||||
|
||||
# Generate concepts from a topic's content without applying them to the topic
|
||||
# @param topic [Topic] A Topic instance
|
||||
# @return [Array<InferredConcept>] The created or found concepts
|
||||
def generate_concepts_from_topic(topic)
|
||||
return [] if topic.blank?
|
||||
|
||||
# Get content to analyze
|
||||
applier = DiscourseAi::InferredConcepts::Applier.new
|
||||
content = applier.topic_content_for_analysis(topic)
|
||||
return [] if content.blank?
|
||||
|
||||
# Generate concepts from the content
|
||||
generate_concepts_from_content(content)
|
||||
end
|
||||
|
||||
# Generate concepts from a post's content without applying them to the post
|
||||
# @param post [Post] A Post instance
|
||||
# @return [Array<InferredConcept>] The created or found concepts
|
||||
def generate_concepts_from_post(post)
|
||||
return [] if post.blank?
|
||||
|
||||
# Get content to analyze
|
||||
applier = DiscourseAi::InferredConcepts::Applier.new
|
||||
content = applier.post_content_for_analysis(post)
|
||||
return [] if content.blank?
|
||||
|
||||
# Generate concepts from the content
|
||||
generate_concepts_from_content(content)
|
||||
end
|
||||
|
||||
# Match a topic against existing concepts
|
||||
# @param topic [Topic] A Topic instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
def match_topic_to_concepts(topic)
|
||||
return [] if topic.blank?
|
||||
|
||||
DiscourseAi::InferredConcepts::Applier.new.match_existing_concepts(topic)
|
||||
end
|
||||
|
||||
# Match a post against existing concepts
|
||||
# @param post [Post] A Post instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
def match_post_to_concepts(post)
|
||||
return [] if post.blank?
|
||||
|
||||
DiscourseAi::InferredConcepts::Applier.new.match_existing_concepts_for_post(post)
|
||||
end
|
||||
|
||||
# Find topics that have a specific concept
|
||||
# @param concept_name [String] The name of the concept to search for
|
||||
# @return [Array<Topic>] Topics that have the specified concept
|
||||
def search_topics_by_concept(concept_name)
|
||||
concept = ::InferredConcept.find_by(name: concept_name)
|
||||
return [] unless concept
|
||||
concept.topics
|
||||
end
|
||||
|
||||
# Find posts that have a specific concept
|
||||
# @param concept_name [String] The name of the concept to search for
|
||||
# @return [Array<Post>] Posts that have the specified concept
|
||||
def search_posts_by_concept(concept_name)
|
||||
concept = ::InferredConcept.find_by(name: concept_name)
|
||||
return [] unless concept
|
||||
concept.posts
|
||||
end
|
||||
|
||||
# Match arbitrary content against existing concepts
|
||||
# @param content [String] The content to analyze
|
||||
# @return [Array<String>] Names of matching concepts
|
||||
def match_content_to_concepts(content)
|
||||
existing_concepts = InferredConcept.all.pluck(:name)
|
||||
return [] if existing_concepts.empty?
|
||||
|
||||
DiscourseAi::InferredConcepts::Applier.new.match_concepts_to_content(
|
||||
content,
|
||||
existing_concepts,
|
||||
)
|
||||
end
|
||||
|
||||
# Find candidate topics that are good for concept generation
|
||||
#
|
||||
# @param opts [Hash] Options to pass to the finder
|
||||
# @option opts [Integer] :limit (100) Maximum number of topics to return
|
||||
# @option opts [Integer] :min_posts (5) Minimum number of posts in topic
|
||||
# @option opts [Integer] :min_likes (10) Minimum number of likes across all posts
|
||||
# @option opts [Integer] :min_views (100) Minimum number of views
|
||||
# @option opts [Array<Integer>] :exclude_topic_ids ([]) Topic IDs to exclude
|
||||
# @option opts [Array<Integer>] :category_ids (nil) Only include topics from these categories
|
||||
# @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time
|
||||
# @return [Array<Topic>] Array of Topic objects that are good candidates
|
||||
def find_candidate_topics(opts = {})
|
||||
DiscourseAi::InferredConcepts::Finder.new.find_candidate_topics(**opts)
|
||||
end
|
||||
|
||||
# Find candidate posts that are good for concept generation
|
||||
# @param opts [Hash] Options to pass to the finder
|
||||
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||
def find_candidate_posts(opts = {})
|
||||
DiscourseAi::InferredConcepts::Finder.new.find_candidate_posts(**opts)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -152,10 +152,12 @@ module DiscourseAi
|
||||
raw_context << partial
|
||||
current_thinking << partial
|
||||
end
|
||||
elsif partial.is_a?(DiscourseAi::Completions::StructuredOutput)
|
||||
update_blk.call(partial, nil, :structured_output)
|
||||
else
|
||||
update_blk.call(partial)
|
||||
elsif update_blk.present?
|
||||
if partial.is_a?(DiscourseAi::Completions::StructuredOutput)
|
||||
update_blk.call(partial, nil, :structured_output)
|
||||
else
|
||||
update_blk.call(partial)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -316,7 +318,13 @@ module DiscourseAi
|
||||
response_format
|
||||
.to_a
|
||||
.reduce({}) do |memo, format|
|
||||
memo[format["key"].to_sym] = { type: format["type"] }
|
||||
type_desc = { type: format["type"] }
|
||||
|
||||
if format["type"] == "array"
|
||||
type_desc[:items] = { type: format["array_type"] || "string" }
|
||||
end
|
||||
|
||||
memo[format["key"].to_sym] = type_desc
|
||||
memo
|
||||
end
|
||||
|
||||
|
@ -17,7 +17,8 @@ module DiscourseAi
|
||||
:context_post_ids,
|
||||
:feature_name,
|
||||
:resource_url,
|
||||
:cancel_manager
|
||||
:cancel_manager,
|
||||
:inferred_concepts
|
||||
|
||||
def initialize(
|
||||
post: nil,
|
||||
@ -35,7 +36,8 @@ module DiscourseAi
|
||||
context_post_ids: nil,
|
||||
feature_name: "bot",
|
||||
resource_url: nil,
|
||||
cancel_manager: nil
|
||||
cancel_manager: nil,
|
||||
inferred_concepts: []
|
||||
)
|
||||
@participants = participants
|
||||
@user = user
|
||||
@ -54,7 +56,7 @@ module DiscourseAi
|
||||
@resource_url = resource_url
|
||||
|
||||
@feature_name = feature_name
|
||||
@resource_url = resource_url
|
||||
@inferred_concepts = inferred_concepts
|
||||
|
||||
@cancel_manager = cancel_manager
|
||||
|
||||
@ -68,7 +70,15 @@ module DiscourseAi
|
||||
end
|
||||
|
||||
# these are strings that can be safely interpolated into templates
|
||||
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
|
||||
TEMPLATE_PARAMS = %w[
|
||||
time
|
||||
site_url
|
||||
site_title
|
||||
site_description
|
||||
participants
|
||||
resource_url
|
||||
inferred_concepts
|
||||
]
|
||||
|
||||
def lookup_template_param(key)
|
||||
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
|
||||
@ -114,6 +124,7 @@ module DiscourseAi
|
||||
skip_tool_details: @skip_tool_details,
|
||||
feature_name: @feature_name,
|
||||
resource_url: @resource_url,
|
||||
inferred_concepts: @inferred_concepts,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
53
lib/personas/concept_deduplicator.rb
Normal file
53
lib/personas/concept_deduplicator.rb
Normal file
@ -0,0 +1,53 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class ConceptDeduplicator < Persona
|
||||
def self.default_enabled
|
||||
false
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
<<~PROMPT.strip
|
||||
You will be given a list of machine-generated tags.
|
||||
Your task is to streamline this list by merging entries who are similar or related.
|
||||
|
||||
Please follow these steps to create a streamlined list of tags:
|
||||
|
||||
1. Review the entire list of tags carefully.
|
||||
2. Identify and remove any exact duplicates.
|
||||
3. Look for tags that are too specific or niche, and consider removing them or replacing them with more general terms.
|
||||
4. If there are multiple tags that convey similar concepts, choose the best one and remove the others, or add a new one that covers the missing aspect.
|
||||
5. Ensure that the remaining tags are relevant and useful for describing the content.
|
||||
|
||||
When deciding which tags are "best", consider the following criteria:
|
||||
- Relevance: How well does the tag describe the core content or theme?
|
||||
- Generality: Is the tag specific enough to be useful, but not so specific that it's unlikely to be searched for?
|
||||
- Clarity: Is the tag easy to understand and free from ambiguity?
|
||||
- Popularity: Would this tag likely be used by people searching for this type of content?
|
||||
|
||||
Example Input:
|
||||
AI Bias, AI Bots, AI Ethics, AI Helper, AI Integration, AI Moderation, AI Search, AI-Driven Moderation, AI-Generated Post Illustrations, AJAX Events, AJAX Requests, AMA Events, API, API Access, API Authentication, API Automation, API Call, API Changes, API Compliance, API Configuration, API Costs, API Documentation, API Endpoint, API Endpoints, API Functions, API Integration, API Key, API Keys, API Limitation, API Limitations, API Permissions, API Rate Limiting, API Request, API Request Optimization, API Requests, API Security, API Suspension, API Token, API Tokens, API Translation, API Versioning, API configuration, API endpoint, API key, APIs, APK, APT Package Manager, ARIA, ARIA Tags, ARM Architecture, ARM-based, AWS, AWS Lightsail, AWS RDS, AWS S3, AWS Translate, AWS costs, AWS t2.micro, Abbreviation Expansion, Abbreviations
|
||||
|
||||
Example Output:
|
||||
AI, AJAX, API, APK, APT Package Manager, ARIA, ARM Architecture, AWS, Abbreviations
|
||||
|
||||
Please provide your streamlined list of tags within <streamlined_tags> key.
|
||||
|
||||
Remember, the goal is to create a more focused and effective set of tags while maintaining the essence of the original list.
|
||||
|
||||
Your output should be in the following format:
|
||||
<o>
|
||||
{
|
||||
"streamlined_tags": ["tag1", "tag3"]
|
||||
}
|
||||
</o>
|
||||
PROMPT
|
||||
end
|
||||
|
||||
def response_format
|
||||
[{ "key" => "streamlined_tags", "type" => "array", "array_type" => "string" }]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
49
lib/personas/concept_finder.rb
Normal file
49
lib/personas/concept_finder.rb
Normal file
@ -0,0 +1,49 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class ConceptFinder < Persona
|
||||
def self.default_enabled
|
||||
false
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.new.list_concepts(limit: 100)
|
||||
existing_concepts_text = ""
|
||||
|
||||
existing_concepts_text = <<~CONCEPTS if existing_concepts.present?
|
||||
The following concepts already exist in the system:
|
||||
#{existing_concepts.join(", ")}
|
||||
|
||||
You can reuse these existing concepts if they apply to the content, or suggest new concepts.
|
||||
CONCEPTS
|
||||
|
||||
<<~PROMPT.strip
|
||||
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
|
||||
Your job is to extract meaningful labels that can be used to categorize content.
|
||||
|
||||
Guidelines for generating concepts:
|
||||
- Extract up to 7 concepts from the provided content
|
||||
- Concepts should be single words or short phrases (1-3 words maximum)
|
||||
- Focus on substantive topics, themes, technologies, methodologies, or domains
|
||||
- Avoid overly general terms like "discussion" or "question"
|
||||
- Ensure concepts are relevant to the core content
|
||||
- Do not include proper nouns unless they represent key technologies or methodologies
|
||||
- Maintain the original language of the text being analyzed
|
||||
#{existing_concepts_text}
|
||||
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
|
||||
Your output should be in the following format:
|
||||
<o>
|
||||
{"concepts": ["concept1", "concept2", "concept3"]}
|
||||
</o>
|
||||
|
||||
Where the concepts are replaced by the actual concepts you've identified.
|
||||
PROMPT
|
||||
end
|
||||
|
||||
def response_format
|
||||
[{ "key" => "concepts", "type" => "array", "array_type" => "string" }]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
43
lib/personas/concept_matcher.rb
Normal file
43
lib/personas/concept_matcher.rb
Normal file
@ -0,0 +1,43 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class ConceptMatcher < Persona
|
||||
def self.default_enabled
|
||||
false
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
<<~PROMPT.strip
|
||||
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
|
||||
Your job is to analyze the content and determine which concepts from the list apply to it.
|
||||
|
||||
Guidelines for matching concepts:
|
||||
- Only select concepts that are clearly relevant to the content
|
||||
- The content must substantially discuss or relate to the concept
|
||||
- Superficial mentions are not enough to consider a concept relevant
|
||||
- Be precise and selective - don't match concepts that are only tangentially related
|
||||
- Consider both explicit mentions and implicit discussions of concepts
|
||||
- Maintain the original language of the text being analyzed
|
||||
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
|
||||
- If no concepts from the list match the content, return an empty array
|
||||
|
||||
The list of available concepts is:
|
||||
{inferred_concepts}
|
||||
|
||||
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
|
||||
Your output should be in the following format:
|
||||
<o>
|
||||
{"matching_concepts": ["concept1", "concept3", "concept5"]}
|
||||
</o>
|
||||
|
||||
Only include concepts from the provided list that match the content. If no concepts match, return an empty array.
|
||||
PROMPT
|
||||
end
|
||||
|
||||
def response_format
|
||||
[{ "key" => "matching_concepts", "type" => "array", "array_type" => "string" }]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -52,6 +52,9 @@ module DiscourseAi
|
||||
ShortSummarizer => -12,
|
||||
Designer => -13,
|
||||
ForumResearcher => -14,
|
||||
ConceptFinder => -15,
|
||||
ConceptMatcher => -16,
|
||||
ConceptDeduplicator => -17,
|
||||
}
|
||||
end
|
||||
|
||||
|
@ -11,6 +11,9 @@ module DiscourseAi
|
||||
-> { where(classification_type: "sentiment") },
|
||||
class_name: "ClassificationResult",
|
||||
as: :target
|
||||
|
||||
has_many :inferred_concept_posts
|
||||
has_many :inferred_concepts, through: :inferred_concept_posts
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -11,6 +11,9 @@ module DiscourseAi
|
||||
-> { where(summary_type: AiSummary.summary_types[:gist]) },
|
||||
class_name: "AiSummary",
|
||||
as: :target
|
||||
|
||||
has_many :inferred_concept_topics
|
||||
has_many :inferred_concepts, through: :inferred_concept_topics
|
||||
end
|
||||
end
|
||||
end
|
||||
|
2
spec/fabricators/inferred_concept_fabricator.rb
Normal file
2
spec/fabricators/inferred_concept_fabricator.rb
Normal file
@ -0,0 +1,2 @@
|
||||
# frozen_string_literal: true
|
||||
Fabricator(:inferred_concept) { name { sequence(:name) { |i| "concept_#{i}" } } }
|
167
spec/jobs/regular/generate_inferred_concepts_spec.rb
Normal file
167
spec/jobs/regular/generate_inferred_concepts_spec.rb
Normal file
@ -0,0 +1,167 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe Jobs::GenerateInferredConcepts do
|
||||
fab!(:topic)
|
||||
fab!(:post)
|
||||
fab!(:concept) { Fabricate(:inferred_concept, name: "programming") }
|
||||
|
||||
before { SiteSetting.inferred_concepts_enabled = true }
|
||||
|
||||
describe "#execute" do
|
||||
it "does nothing with blank item_ids" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:match_topic_to_concepts,
|
||||
)
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: [])
|
||||
subject.execute(item_type: "topics", item_ids: nil)
|
||||
end
|
||||
|
||||
it "does nothing with blank item_type" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:match_topic_to_concepts,
|
||||
)
|
||||
|
||||
subject.execute(item_type: "", item_ids: [topic.id])
|
||||
subject.execute(item_type: nil, item_ids: [topic.id])
|
||||
end
|
||||
|
||||
it "validates item_type to be topics or posts" do
|
||||
allow(Rails.logger).to receive(:error).with(/Invalid item_type/)
|
||||
|
||||
subject.execute(item_type: "invalid", item_ids: [1])
|
||||
end
|
||||
|
||||
context "with topics" do
|
||||
it "processes topics in match_only mode" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:match_topic_to_concepts,
|
||||
).with(topic)
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: true)
|
||||
end
|
||||
|
||||
it "processes topics in generation mode" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:generate_concepts_from_topic,
|
||||
).with(topic)
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: false)
|
||||
end
|
||||
|
||||
it "handles topics that don't exist" do
|
||||
# Non-existent IDs should be silently skipped (no error expected)
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:match_topic_to_concepts,
|
||||
)
|
||||
|
||||
subject.execute(
|
||||
item_type: "topics",
|
||||
item_ids: [999_999], # non-existent ID
|
||||
match_only: true,
|
||||
)
|
||||
end
|
||||
|
||||
it "processes multiple topics" do
|
||||
topic2 = Fabricate(:topic)
|
||||
|
||||
manager_instance = instance_double(DiscourseAi::InferredConcepts::Manager)
|
||||
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager_instance)
|
||||
|
||||
allow(manager_instance).to receive(:match_topic_to_concepts).with(topic)
|
||||
allow(manager_instance).to receive(:match_topic_to_concepts).with(topic2)
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: [topic.id, topic2.id], match_only: true)
|
||||
end
|
||||
|
||||
it "processes topics in batches" do
|
||||
topics = Array.new(5) { Fabricate(:topic) }
|
||||
topic_ids = topics.map(&:id)
|
||||
|
||||
# Should process in batches of 3
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[0..2]).and_call_original
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[3..4]).and_call_original
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: topic_ids, batch_size: 3, match_only: true)
|
||||
end
|
||||
end
|
||||
|
||||
context "with posts" do
|
||||
it "processes posts in match_only mode" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:match_post_to_concepts,
|
||||
).with(post)
|
||||
|
||||
subject.execute(item_type: "posts", item_ids: [post.id], match_only: true)
|
||||
end
|
||||
|
||||
it "processes posts in generation mode" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:generate_concepts_from_post,
|
||||
).with(post)
|
||||
|
||||
subject.execute(item_type: "posts", item_ids: [post.id], match_only: false)
|
||||
end
|
||||
|
||||
it "handles posts that don't exist" do
|
||||
# Non-existent IDs should be silently skipped (no error expected)
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:match_post_to_concepts,
|
||||
)
|
||||
|
||||
subject.execute(
|
||||
item_type: "posts",
|
||||
item_ids: [999_999], # non-existent ID
|
||||
match_only: true,
|
||||
)
|
||||
end
|
||||
|
||||
it "processes multiple posts" do
|
||||
post2 = Fabricate(:post)
|
||||
|
||||
manager_instance = instance_double(DiscourseAi::InferredConcepts::Manager)
|
||||
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager_instance)
|
||||
|
||||
allow(manager_instance).to receive(:match_post_to_concepts).with(post)
|
||||
allow(manager_instance).to receive(:match_post_to_concepts).with(post2)
|
||||
|
||||
subject.execute(item_type: "posts", item_ids: [post.id, post2.id], match_only: true)
|
||||
end
|
||||
end
|
||||
|
||||
it "handles exceptions during processing" do
|
||||
allow_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:match_topic_to_concepts,
|
||||
).and_raise(StandardError.new("Test error"))
|
||||
|
||||
allow(Rails.logger).to receive(:error).with(
|
||||
/Error generating concepts from topic #{topic.id}/,
|
||||
)
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: [topic.id], match_only: true)
|
||||
end
|
||||
|
||||
it "uses default batch size of 100" do
|
||||
topics = Array.new(150) { Fabricate(:topic) }
|
||||
topic_ids = topics.map(&:id)
|
||||
|
||||
# Should process in batches of 100
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[0..99]).and_call_original
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[100..149]).and_call_original
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: topic_ids, match_only: true)
|
||||
end
|
||||
|
||||
it "respects custom batch size" do
|
||||
topics = Array.new(5) { Fabricate(:topic) }
|
||||
topic_ids = topics.map(&:id)
|
||||
|
||||
# Should process in batches of 2
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[0..1]).and_call_original
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[2..3]).and_call_original
|
||||
allow(Topic).to receive(:where).with(id: topic_ids[4..4]).and_call_original
|
||||
|
||||
subject.execute(item_type: "topics", item_ids: topic_ids, batch_size: 2, match_only: true)
|
||||
end
|
||||
end
|
||||
end
|
259
spec/jobs/scheduled/generate_concepts_from_popular_items_spec.rb
Normal file
259
spec/jobs/scheduled/generate_concepts_from_popular_items_spec.rb
Normal file
@ -0,0 +1,259 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe Jobs::GenerateConceptsFromPopularItems do
|
||||
fab!(:topic) { Fabricate(:topic, posts_count: 6, views: 150, like_count: 12) }
|
||||
fab!(:post) { Fabricate(:post, like_count: 8, post_number: 2) }
|
||||
|
||||
before do
|
||||
SiteSetting.inferred_concepts_enabled = true
|
||||
SiteSetting.inferred_concepts_daily_topics_limit = 20
|
||||
SiteSetting.inferred_concepts_daily_posts_limit = 30
|
||||
SiteSetting.inferred_concepts_min_posts = 5
|
||||
SiteSetting.inferred_concepts_min_likes = 10
|
||||
SiteSetting.inferred_concepts_min_views = 100
|
||||
SiteSetting.inferred_concepts_post_min_likes = 5
|
||||
SiteSetting.inferred_concepts_lookback_days = 30
|
||||
SiteSetting.inferred_concepts_background_match = false
|
||||
end
|
||||
|
||||
describe "#execute" do
|
||||
it "does nothing when inferred_concepts_enabled is false" do
|
||||
SiteSetting.inferred_concepts_enabled = false
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:find_candidate_topics,
|
||||
)
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).not_to receive(
|
||||
:find_candidate_posts,
|
||||
)
|
||||
allow(Jobs).to receive(:enqueue)
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
|
||||
it "processes popular topics when enabled" do
|
||||
candidate_topics = [topic]
|
||||
|
||||
freeze_time do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).with(
|
||||
limit: 20,
|
||||
min_posts: 5,
|
||||
min_likes: 10,
|
||||
min_views: 100,
|
||||
created_after: 30.days.ago,
|
||||
).and_return(candidate_topics)
|
||||
|
||||
allow(Jobs).to receive(:enqueue).with(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "topics",
|
||||
item_ids: [topic.id],
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).and_return([])
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
it "processes popular posts when enabled" do
|
||||
candidate_posts = [post]
|
||||
|
||||
freeze_time do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).and_return([])
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).with(
|
||||
limit: 30,
|
||||
min_likes: 5,
|
||||
exclude_first_posts: true,
|
||||
created_after: 30.days.ago,
|
||||
).and_return(candidate_posts)
|
||||
|
||||
allow(Jobs).to receive(:enqueue).with(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "posts",
|
||||
item_ids: [post.id],
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
it "schedules background matching jobs when enabled" do
|
||||
SiteSetting.inferred_concepts_background_match = true
|
||||
|
||||
candidate_topics = [topic]
|
||||
candidate_posts = [post]
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).and_return(candidate_topics)
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).and_return(candidate_posts)
|
||||
|
||||
# Expect generation jobs
|
||||
allow(Jobs).to receive(:enqueue).with(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "topics",
|
||||
item_ids: [topic.id],
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
allow(Jobs).to receive(:enqueue).with(
|
||||
:generate_inferred_concepts,
|
||||
item_type: "posts",
|
||||
item_ids: [post.id],
|
||||
batch_size: 10,
|
||||
)
|
||||
|
||||
# Expect background matching jobs
|
||||
allow(Jobs).to receive(:enqueue_in).with(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: "topics",
|
||||
item_ids: [topic.id],
|
||||
batch_size: 10,
|
||||
match_only: true,
|
||||
)
|
||||
|
||||
allow(Jobs).to receive(:enqueue_in).with(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: "posts",
|
||||
item_ids: [post.id],
|
||||
batch_size: 10,
|
||||
match_only: true,
|
||||
)
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
|
||||
it "does not schedule jobs when no candidates found" do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).and_return([])
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).and_return([])
|
||||
|
||||
allow(Jobs).to receive(:enqueue)
|
||||
allow(Jobs).to receive(:enqueue_in)
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
|
||||
it "uses site setting values for topic filtering" do
|
||||
SiteSetting.inferred_concepts_daily_topics_limit = 50
|
||||
SiteSetting.inferred_concepts_min_posts = 8
|
||||
SiteSetting.inferred_concepts_min_likes = 15
|
||||
SiteSetting.inferred_concepts_min_views = 200
|
||||
SiteSetting.inferred_concepts_lookback_days = 45
|
||||
|
||||
freeze_time do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).with(
|
||||
limit: 50,
|
||||
min_posts: 8,
|
||||
min_likes: 15,
|
||||
min_views: 200,
|
||||
created_after: 45.days.ago,
|
||||
).and_return([])
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).and_return([])
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
it "uses site setting values for post filtering" do
|
||||
SiteSetting.inferred_concepts_daily_posts_limit = 40
|
||||
SiteSetting.inferred_concepts_post_min_likes = 8
|
||||
SiteSetting.inferred_concepts_lookback_days = 45
|
||||
|
||||
freeze_time do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).and_return([])
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).with(
|
||||
limit: 40,
|
||||
min_likes: 8,
|
||||
exclude_first_posts: true,
|
||||
created_after: 45.days.ago,
|
||||
).and_return([])
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
it "handles nil site setting values gracefully" do
|
||||
SiteSetting.inferred_concepts_daily_topics_limit = nil
|
||||
SiteSetting.inferred_concepts_daily_posts_limit = nil
|
||||
SiteSetting.inferred_concepts_min_posts = nil
|
||||
SiteSetting.inferred_concepts_min_likes = nil
|
||||
SiteSetting.inferred_concepts_min_views = nil
|
||||
SiteSetting.inferred_concepts_post_min_likes = nil
|
||||
# Keep lookback_days at default so .days.ago doesn't fail
|
||||
|
||||
freeze_time do
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).with(
|
||||
limit: 0, # nil becomes 0
|
||||
min_posts: 0, # nil becomes 0
|
||||
min_likes: 0, # nil becomes 0
|
||||
min_views: 0, # nil becomes 0
|
||||
created_after: 30.days.ago, # default from before block
|
||||
).and_return([])
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).with(
|
||||
limit: 0, # nil becomes 0
|
||||
min_likes: 0, # nil becomes 0
|
||||
exclude_first_posts: true,
|
||||
created_after: 30.days.ago, # default from before block
|
||||
).and_return([])
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
it "processes both topics and posts in the same run" do
|
||||
candidate_topics = [topic]
|
||||
candidate_posts = [post]
|
||||
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_topics,
|
||||
).and_return(candidate_topics)
|
||||
expect_any_instance_of(DiscourseAi::InferredConcepts::Manager).to receive(
|
||||
:find_candidate_posts,
|
||||
).and_return(candidate_posts)
|
||||
|
||||
allow(Jobs).to receive(:enqueue).twice
|
||||
|
||||
subject.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
context "when scheduling the job" do
|
||||
it "is scheduled to run daily" do
|
||||
expect(described_class.every).to eq(1.day)
|
||||
end
|
||||
end
|
||||
end
|
@ -672,5 +672,87 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
|
||||
expect(structured_output.read_buffered_property(:key)).to eq("Hello!\n There")
|
||||
end
|
||||
end
|
||||
|
||||
it "works with JSON schema array types" do
|
||||
schema = {
|
||||
type: "json_schema",
|
||||
json_schema: {
|
||||
name: "reply",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
plain: {
|
||||
type: "string",
|
||||
},
|
||||
key: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
required: %w[plain key],
|
||||
additionalProperties: false,
|
||||
},
|
||||
strict: true,
|
||||
},
|
||||
}
|
||||
|
||||
messages =
|
||||
[
|
||||
{ type: "message_start", message: { usage: { input_tokens: 9 } } },
|
||||
{ type: "content_block_delta", delta: { text: "\"" } },
|
||||
{ type: "content_block_delta", delta: { text: "key" } },
|
||||
{ type: "content_block_delta", delta: { text: "\":" } },
|
||||
{ type: "content_block_delta", delta: { text: " [\"" } },
|
||||
{ type: "content_block_delta", delta: { text: "Hello!" } },
|
||||
{ type: "content_block_delta", delta: { text: " I am" } },
|
||||
{ type: "content_block_delta", delta: { text: " a " } },
|
||||
{ type: "content_block_delta", delta: { text: "chunk\"," } },
|
||||
{ type: "content_block_delta", delta: { text: "\"There" } },
|
||||
{ type: "content_block_delta", delta: { text: "\"]," } },
|
||||
{ type: "content_block_delta", delta: { text: " \"plain" } },
|
||||
{ type: "content_block_delta", delta: { text: "\":\"" } },
|
||||
{ type: "content_block_delta", delta: { text: "I'm here" } },
|
||||
{ type: "content_block_delta", delta: { text: " too\"}" } },
|
||||
{ type: "message_delta", delta: { usage: { output_tokens: 25 } } },
|
||||
].map { |message| encode_message(message) }
|
||||
|
||||
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
request = nil
|
||||
bedrock_mock.with_chunk_array_support do
|
||||
stub_request(
|
||||
:post,
|
||||
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
|
||||
)
|
||||
.with do |inner_request|
|
||||
request = inner_request
|
||||
true
|
||||
end
|
||||
.to_return(status: 200, body: messages)
|
||||
|
||||
structured_output = nil
|
||||
proxy.generate("hello world", response_format: schema, user: user) do |partial|
|
||||
structured_output = partial
|
||||
end
|
||||
|
||||
expected = {
|
||||
"max_tokens" => 4096,
|
||||
"anthropic_version" => "bedrock-2023-05-31",
|
||||
"messages" => [
|
||||
{ "role" => "user", "content" => "hello world" },
|
||||
{ "role" => "assistant", "content" => "{" },
|
||||
],
|
||||
"system" => "You are a helpful bot",
|
||||
}
|
||||
expect(JSON.parse(request.body)).to eq(expected)
|
||||
|
||||
expect(structured_output.read_buffered_property(:key)).to contain_exactly(
|
||||
"Hello! I am a chunk",
|
||||
"There",
|
||||
)
|
||||
expect(structured_output.read_buffered_property(:plain)).to eq("I'm here too")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -16,6 +16,12 @@ RSpec.describe DiscourseAi::Completions::StructuredOutput do
|
||||
status: {
|
||||
type: "string",
|
||||
},
|
||||
list: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
end
|
||||
@ -64,6 +70,48 @@ RSpec.describe DiscourseAi::Completions::StructuredOutput do
|
||||
# No partial string left to read.
|
||||
expect(structured_output.read_buffered_property(:status)).to eq("")
|
||||
end
|
||||
|
||||
it "supports array types" do
|
||||
chunks = [
|
||||
+"{ \"",
|
||||
+"list",
|
||||
+"\":",
|
||||
+" [\"",
|
||||
+"Hello!",
|
||||
+" I am",
|
||||
+" a ",
|
||||
+"chunk\",",
|
||||
+"\"There\"",
|
||||
+"]}",
|
||||
]
|
||||
|
||||
structured_output << chunks[0]
|
||||
structured_output << chunks[1]
|
||||
structured_output << chunks[2]
|
||||
expect(structured_output.read_buffered_property(:list)).to eq(nil)
|
||||
|
||||
structured_output << chunks[3]
|
||||
expect(structured_output.read_buffered_property(:list)).to eq([""])
|
||||
|
||||
structured_output << chunks[4]
|
||||
expect(structured_output.read_buffered_property(:list)).to eq(["Hello!"])
|
||||
|
||||
structured_output << chunks[5]
|
||||
structured_output << chunks[6]
|
||||
structured_output << chunks[7]
|
||||
|
||||
expect(structured_output.read_buffered_property(:list)).to eq(["Hello! I am a chunk"])
|
||||
|
||||
structured_output << chunks[8]
|
||||
expect(structured_output.read_buffered_property(:list)).to eq(
|
||||
["Hello! I am a chunk", "There"],
|
||||
)
|
||||
|
||||
structured_output << chunks[9]
|
||||
expect(structured_output.read_buffered_property(:list)).to eq(
|
||||
["Hello! I am a chunk", "There"],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe "dealing with non-JSON responses" do
|
||||
|
320
spec/lib/inferred_concepts/applier_spec.rb
Normal file
320
spec/lib/inferred_concepts/applier_spec.rb
Normal file
@ -0,0 +1,320 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::InferredConcepts::Applier do
|
||||
subject(:applier) { described_class.new }
|
||||
|
||||
fab!(:topic) { Fabricate(:topic, title: "Ruby Programming Tutorial") }
|
||||
fab!(:post) { Fabricate(:post, raw: "This post is about advanced testing techniques") }
|
||||
fab!(:user) { Fabricate(:user, username: "dev_user") }
|
||||
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
|
||||
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
|
||||
fab!(:llm_model) { Fabricate(:fake_model) }
|
||||
|
||||
before do
|
||||
SiteSetting.inferred_concepts_match_persona = -1
|
||||
SiteSetting.inferred_concepts_enabled = true
|
||||
|
||||
# Set up the post's user
|
||||
post.update!(user: user)
|
||||
end
|
||||
|
||||
describe "#apply_to_topic" do
|
||||
it "does nothing for blank topic or concepts" do
|
||||
expect { applier.apply_to_topic(nil, [concept1]) }.not_to raise_error
|
||||
expect { applier.apply_to_topic(topic, []) }.not_to raise_error
|
||||
expect { applier.apply_to_topic(topic, nil) }.not_to raise_error
|
||||
end
|
||||
|
||||
it "associates concepts with topic" do
|
||||
applier.apply_to_topic(topic, [concept1, concept2])
|
||||
|
||||
expect(topic.inferred_concepts).to include(concept1, concept2)
|
||||
expect(concept1.topics).to include(topic)
|
||||
expect(concept2.topics).to include(topic)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#apply_to_post" do
|
||||
it "does nothing for blank post or concepts" do
|
||||
expect { applier.apply_to_post(nil, [concept1]) }.not_to raise_error
|
||||
expect { applier.apply_to_post(post, []) }.not_to raise_error
|
||||
expect { applier.apply_to_post(post, nil) }.not_to raise_error
|
||||
end
|
||||
|
||||
it "associates concepts with post" do
|
||||
applier.apply_to_post(post, [concept1, concept2])
|
||||
|
||||
expect(post.inferred_concepts).to include(concept1, concept2)
|
||||
expect(concept1.posts).to include(post)
|
||||
expect(concept2.posts).to include(post)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#topic_content_for_analysis" do
|
||||
it "returns empty string for blank topic" do
|
||||
expect(applier.topic_content_for_analysis(nil)).to eq("")
|
||||
end
|
||||
|
||||
it "extracts title and posts content" do
|
||||
# Create additional posts for the topic
|
||||
post1 = Fabricate(:post, topic: topic, post_number: 1, raw: "First post content", user: user)
|
||||
post2 = Fabricate(:post, topic: topic, post_number: 2, raw: "Second post content", user: user)
|
||||
|
||||
content = applier.topic_content_for_analysis(topic)
|
||||
|
||||
expect(content).to include(topic.title)
|
||||
expect(content).to include("First post content")
|
||||
expect(content).to include("Second post content")
|
||||
expect(content).to include(user.username)
|
||||
expect(content).to include("1)")
|
||||
expect(content).to include("2)")
|
||||
end
|
||||
|
||||
it "limits to first 10 posts" do
|
||||
# Create 12 posts for the topic
|
||||
12.times { |i| Fabricate(:post, topic: topic, post_number: i + 1, user: user) }
|
||||
|
||||
allow(Post).to receive(:where).with(topic_id: topic.id).and_call_original
|
||||
allow_any_instance_of(ActiveRecord::Relation).to receive(:limit).with(10).and_call_original
|
||||
|
||||
applier.topic_content_for_analysis(topic)
|
||||
|
||||
expect(Post).to have_received(:where).with(topic_id: topic.id)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#post_content_for_analysis" do
|
||||
it "returns empty string for blank post" do
|
||||
expect(applier.post_content_for_analysis(nil)).to eq("")
|
||||
end
|
||||
|
||||
it "extracts post content with topic context" do
|
||||
content = applier.post_content_for_analysis(post)
|
||||
|
||||
expect(content).to include(post.topic.title)
|
||||
expect(content).to include(post.raw)
|
||||
expect(content).to include(post.user.username)
|
||||
expect(content).to include("Topic:")
|
||||
expect(content).to include("Post by")
|
||||
end
|
||||
|
||||
it "handles post without topic" do
|
||||
# Mock the post to return nil for topic
|
||||
allow(post).to receive(:topic).and_return(nil)
|
||||
|
||||
content = applier.post_content_for_analysis(post)
|
||||
|
||||
expect(content).to include(post.raw)
|
||||
expect(content).to include(post.user.username)
|
||||
expect(content).to include("Topic: ")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_existing_concepts" do
|
||||
let(:manager) { instance_double(DiscourseAi::InferredConcepts::Manager) }
|
||||
|
||||
before do
|
||||
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
|
||||
allow(manager).to receive(:list_concepts).and_return(%w[programming testing ruby])
|
||||
end
|
||||
|
||||
it "returns empty array for blank topic" do
|
||||
expect(applier.match_existing_concepts(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "returns empty array when no existing concepts" do
|
||||
allow(manager).to receive(:list_concepts).and_return([])
|
||||
|
||||
result = applier.match_existing_concepts(topic)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "matches concepts and applies them to topic" do
|
||||
# Test the real implementation without stubbing internal methods
|
||||
allow(InferredConcept).to receive(:where).with(name: ["programming"]).and_return([concept1])
|
||||
|
||||
# Mock the LLM interaction
|
||||
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_spy(DiscourseAi::Personas::Bot)
|
||||
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(
|
||||
structured_output_double,
|
||||
nil,
|
||||
:structured_output,
|
||||
)
|
||||
allow(structured_output_double).to receive(:read_buffered_property).with(
|
||||
:matching_concepts,
|
||||
).and_return(["programming"])
|
||||
|
||||
result = applier.match_existing_concepts(topic)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_existing_concepts_for_post" do
|
||||
let(:manager) { instance_double(DiscourseAi::InferredConcepts::Manager) }
|
||||
|
||||
before do
|
||||
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
|
||||
allow(manager).to receive(:list_concepts).and_return(%w[programming testing ruby])
|
||||
end
|
||||
|
||||
it "returns empty array for blank post" do
|
||||
expect(applier.match_existing_concepts_for_post(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "returns empty array when no existing concepts" do
|
||||
allow(manager).to receive(:list_concepts).and_return([])
|
||||
|
||||
result = applier.match_existing_concepts_for_post(post)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "matches concepts and applies them to post" do
|
||||
# Test the real implementation without stubbing internal methods
|
||||
allow(InferredConcept).to receive(:where).with(name: ["testing"]).and_return([concept2])
|
||||
|
||||
# Mock the LLM interaction
|
||||
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_spy(DiscourseAi::Personas::Bot)
|
||||
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(
|
||||
structured_output_double,
|
||||
nil,
|
||||
:structured_output,
|
||||
)
|
||||
allow(structured_output_double).to receive(:read_buffered_property).with(
|
||||
:matching_concepts,
|
||||
).and_return(["testing"])
|
||||
|
||||
result = applier.match_existing_concepts_for_post(post)
|
||||
expect(result).to eq([concept2])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_concepts_to_content" do
|
||||
it "returns empty array for blank content or concept list" do
|
||||
expect(applier.match_concepts_to_content("", ["concept1"])).to eq([])
|
||||
expect(applier.match_concepts_to_content(nil, ["concept1"])).to eq([])
|
||||
expect(applier.match_concepts_to_content("content", [])).to eq([])
|
||||
expect(applier.match_concepts_to_content("content", nil)).to eq([])
|
||||
end
|
||||
|
||||
it "uses ConceptMatcher persona to match concepts" do
|
||||
content = "This is about Ruby programming"
|
||||
concept_list = %w[programming testing ruby]
|
||||
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = instance_spy("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_spy(DiscourseAi::Personas::Bot)
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(
|
||||
structured_output_double,
|
||||
nil,
|
||||
:structured_output,
|
||||
)
|
||||
allow(structured_output_double).to receive(:read_buffered_property).with(
|
||||
:matching_concepts,
|
||||
).and_return(%w[programming ruby])
|
||||
|
||||
result = applier.match_concepts_to_content(content, concept_list)
|
||||
expect(result).to eq(%w[programming ruby])
|
||||
|
||||
expect(bot_double).to have_received(:reply)
|
||||
expect(structured_output_double).to have_received(:read_buffered_property).with(
|
||||
:matching_concepts,
|
||||
)
|
||||
end
|
||||
|
||||
it "handles no structured output gracefully" do
|
||||
content = "Test content"
|
||||
concept_list = ["concept1"]
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
|
||||
|
||||
result = applier.match_concepts_to_content(content, concept_list)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "returns empty array when no matching concepts found" do
|
||||
content = "This is about something else"
|
||||
concept_list = %w[programming testing]
|
||||
expected_response = [['{"matching_concepts": []}']]
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_return(expected_response)
|
||||
|
||||
result = applier.match_concepts_to_content(content, concept_list)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "handles missing matching_concepts key in response" do
|
||||
content = "Test content"
|
||||
concept_list = ["concept1"]
|
||||
expected_response = [['{"other_key": ["value"]}']]
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = instance_double("DiscourseAi::Personas::Persona")
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_match_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_class_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(persona_instance_double).to receive(:class).and_return(persona_class_double)
|
||||
allow(LlmModel).to receive(:find).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_return(expected_response)
|
||||
|
||||
result = applier.match_concepts_to_content(content, concept_list)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
end
|
||||
end
|
269
spec/lib/inferred_concepts/finder_spec.rb
Normal file
269
spec/lib/inferred_concepts/finder_spec.rb
Normal file
@ -0,0 +1,269 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::InferredConcepts::Finder do
|
||||
subject(:finder) { described_class.new }
|
||||
|
||||
fab!(:topic) { Fabricate(:topic, posts_count: 5, views: 200, like_count: 15) }
|
||||
fab!(:post) { Fabricate(:post, like_count: 10) }
|
||||
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
|
||||
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
|
||||
fab!(:llm_model) { Fabricate(:fake_model) }
|
||||
|
||||
before do
|
||||
SiteSetting.inferred_concepts_generate_persona = -1
|
||||
SiteSetting.inferred_concepts_deduplicate_persona = -1
|
||||
SiteSetting.inferred_concepts_enabled = true
|
||||
end
|
||||
|
||||
describe "#identify_concepts" do
|
||||
it "returns empty array for blank content" do
|
||||
expect(finder.identify_concepts("")).to eq([])
|
||||
expect(finder.identify_concepts(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "uses ConceptFinder persona to identify concepts" do
|
||||
content = "This is about Ruby programming and testing"
|
||||
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
|
||||
|
||||
# Mock the persona and bot interaction
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_generate_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(
|
||||
structured_output_double,
|
||||
nil,
|
||||
:structured_output,
|
||||
)
|
||||
allow(structured_output_double).to receive(:read_buffered_property).with(
|
||||
:concepts,
|
||||
).and_return(%w[ruby programming testing])
|
||||
|
||||
result = finder.identify_concepts(content)
|
||||
expect(result).to eq(%w[ruby programming testing])
|
||||
end
|
||||
|
||||
it "handles no structured output gracefully" do
|
||||
content = "Test content"
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_generate_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
|
||||
|
||||
result = finder.identify_concepts(content)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#create_or_find_concepts" do
|
||||
it "returns empty array for blank concept names" do
|
||||
expect(finder.create_or_find_concepts([])).to eq([])
|
||||
expect(finder.create_or_find_concepts(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "creates new concepts for new names" do
|
||||
concept_names = %w[new_concept1 new_concept2]
|
||||
result = finder.create_or_find_concepts(concept_names)
|
||||
|
||||
expect(result.length).to eq(2)
|
||||
expect(result.map(&:name)).to match_array(concept_names)
|
||||
expect(InferredConcept.where(name: concept_names).count).to eq(2)
|
||||
end
|
||||
|
||||
it "finds existing concepts" do
|
||||
concept_names = %w[programming testing]
|
||||
result = finder.create_or_find_concepts(concept_names)
|
||||
|
||||
expect(result.length).to eq(2)
|
||||
expect(result).to include(concept1, concept2)
|
||||
end
|
||||
|
||||
it "handles mix of new and existing concepts" do
|
||||
concept_names = %w[programming new_concept]
|
||||
result = finder.create_or_find_concepts(concept_names)
|
||||
|
||||
expect(result.length).to eq(2)
|
||||
expect(result.map(&:name)).to match_array(concept_names)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#find_candidate_topics" do
|
||||
let!(:good_topic) { Fabricate(:topic, posts_count: 6, views: 150, like_count: 12) }
|
||||
let!(:bad_topic) { Fabricate(:topic, posts_count: 2, views: 50, like_count: 2) }
|
||||
let!(:topic_with_concepts) do
|
||||
t = Fabricate(:topic, posts_count: 8, views: 200, like_count: 20)
|
||||
t.inferred_concepts << concept1
|
||||
t
|
||||
end
|
||||
|
||||
it "finds topics meeting minimum criteria" do
|
||||
candidates = finder.find_candidate_topics(min_posts: 5, min_views: 100, min_likes: 10)
|
||||
|
||||
expect(candidates).to include(good_topic)
|
||||
expect(candidates).not_to include(bad_topic)
|
||||
expect(candidates).not_to include(topic_with_concepts) # already has concepts
|
||||
end
|
||||
|
||||
it "respects limit parameter" do
|
||||
candidates = finder.find_candidate_topics(limit: 1)
|
||||
expect(candidates.length).to be <= 1
|
||||
end
|
||||
|
||||
it "excludes specified topic IDs" do
|
||||
candidates = finder.find_candidate_topics(exclude_topic_ids: [good_topic.id])
|
||||
expect(candidates).not_to include(good_topic)
|
||||
end
|
||||
|
||||
it "filters by category IDs when provided" do
|
||||
category = Fabricate(:category)
|
||||
topic_in_category =
|
||||
Fabricate(:topic, category: category, posts_count: 6, views: 150, like_count: 12)
|
||||
|
||||
candidates = finder.find_candidate_topics(category_ids: [category.id])
|
||||
|
||||
expect(candidates).to include(topic_in_category)
|
||||
expect(candidates).not_to include(good_topic)
|
||||
end
|
||||
|
||||
it "filters by creation date" do
|
||||
old_topic =
|
||||
Fabricate(:topic, posts_count: 6, views: 150, like_count: 12, created_at: 45.days.ago)
|
||||
|
||||
candidates = finder.find_candidate_topics(created_after: 30.days.ago)
|
||||
|
||||
expect(candidates).to include(good_topic)
|
||||
expect(candidates).not_to include(old_topic)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#find_candidate_posts" do
|
||||
let!(:good_post) { Fabricate(:post, like_count: 8, post_number: 2) }
|
||||
let!(:bad_post) { Fabricate(:post, like_count: 2, post_number: 2) }
|
||||
let!(:first_post) { Fabricate(:post, like_count: 10, post_number: 1) }
|
||||
let!(:post_with_concepts) do
|
||||
p = Fabricate(:post, like_count: 15, post_number: 3)
|
||||
p.inferred_concepts << concept1
|
||||
p
|
||||
end
|
||||
|
||||
it "finds posts meeting minimum criteria" do
|
||||
candidates = finder.find_candidate_posts(min_likes: 5)
|
||||
|
||||
expect(candidates).to include(good_post)
|
||||
expect(candidates).not_to include(bad_post)
|
||||
expect(candidates).not_to include(post_with_concepts) # already has concepts
|
||||
end
|
||||
|
||||
it "excludes first posts by default" do
|
||||
candidates = finder.find_candidate_posts(min_likes: 5)
|
||||
|
||||
expect(candidates).not_to include(first_post)
|
||||
end
|
||||
|
||||
it "can include first posts when specified" do
|
||||
candidates = finder.find_candidate_posts(min_likes: 5, exclude_first_posts: false)
|
||||
|
||||
expect(candidates).to include(first_post)
|
||||
end
|
||||
|
||||
it "respects limit parameter" do
|
||||
candidates = finder.find_candidate_posts(limit: 1)
|
||||
expect(candidates.length).to be <= 1
|
||||
end
|
||||
|
||||
it "excludes specified post IDs" do
|
||||
candidates = finder.find_candidate_posts(exclude_post_ids: [good_post.id])
|
||||
expect(candidates).not_to include(good_post)
|
||||
end
|
||||
|
||||
it "filters by category IDs when provided" do
|
||||
category = Fabricate(:category)
|
||||
topic_in_category = Fabricate(:topic, category: category)
|
||||
post_in_category = Fabricate(:post, topic: topic_in_category, like_count: 8, post_number: 2)
|
||||
|
||||
candidates = finder.find_candidate_posts(category_ids: [category.id])
|
||||
|
||||
expect(candidates).to include(post_in_category)
|
||||
expect(candidates).not_to include(good_post)
|
||||
end
|
||||
|
||||
it "filters by creation date" do
|
||||
old_post = Fabricate(:post, like_count: 8, post_number: 2, created_at: 45.days.ago)
|
||||
|
||||
candidates = finder.find_candidate_posts(created_after: 30.days.ago)
|
||||
|
||||
expect(candidates).to include(good_post)
|
||||
expect(candidates).not_to include(old_post)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#deduplicate_concepts" do
|
||||
it "returns empty result for blank concept names" do
|
||||
result = finder.deduplicate_concepts([])
|
||||
expect(result).to eq({ deduplicated_concepts: [], mapping: {} })
|
||||
|
||||
result = finder.deduplicate_concepts(nil)
|
||||
expect(result).to eq({ deduplicated_concepts: [], mapping: {} })
|
||||
end
|
||||
|
||||
it "uses ConceptDeduplicator persona to deduplicate concepts" do
|
||||
concept_names = ["ruby", "Ruby programming", "testing", "unit testing"]
|
||||
structured_output_double = instance_double("DiscourseAi::Completions::StructuredOutput")
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_deduplicate_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(
|
||||
structured_output_double,
|
||||
nil,
|
||||
:structured_output,
|
||||
)
|
||||
allow(structured_output_double).to receive(:read_buffered_property).with(
|
||||
:streamlined_tags,
|
||||
).and_return(%w[ruby testing])
|
||||
|
||||
result = finder.deduplicate_concepts(concept_names)
|
||||
expect(result).to eq(%w[ruby testing])
|
||||
end
|
||||
|
||||
it "handles no structured output gracefully" do
|
||||
concept_names = %w[concept1 concept2]
|
||||
|
||||
persona_class_double = double("PersonaClass") # rubocop:disable RSpec/VerifiedDoubles
|
||||
persona_instance_double = double("PersonaInstance") # rubocop:disable RSpec/VerifiedDoubles
|
||||
bot_double = instance_double("DiscourseAi::Personas::Bot")
|
||||
|
||||
allow(AiPersona).to receive(:all_personas).and_return([persona_class_double])
|
||||
allow(persona_class_double).to receive(:id).and_return(SiteSetting.inferred_concepts_deduplicate_persona.to_i)
|
||||
allow(persona_class_double).to receive(:new).and_return(persona_instance_double)
|
||||
allow(persona_instance_double).to receive(:default_llm_id).and_return(llm_model.id)
|
||||
allow(LlmModel).to receive(:find).with(llm_model.id).and_return(llm_model)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(bot_double)
|
||||
allow(bot_double).to receive(:reply).and_yield(nil, nil, :text)
|
||||
|
||||
result = finder.deduplicate_concepts(concept_names)
|
||||
expect(result).to eq([])
|
||||
end
|
||||
end
|
||||
end
|
239
spec/lib/inferred_concepts/manager_spec.rb
Normal file
239
spec/lib/inferred_concepts/manager_spec.rb
Normal file
@ -0,0 +1,239 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::InferredConcepts::Manager do
|
||||
subject(:manager) { described_class.new }
|
||||
|
||||
fab!(:topic)
|
||||
fab!(:post)
|
||||
fab!(:concept1) { Fabricate(:inferred_concept, name: "programming") }
|
||||
fab!(:concept2) { Fabricate(:inferred_concept, name: "testing") }
|
||||
|
||||
describe "#list_concepts" do
|
||||
it "returns all concepts sorted by name" do
|
||||
concepts = manager.list_concepts
|
||||
expect(concepts).to include("programming", "testing")
|
||||
expect(concepts).to eq(concepts.sort)
|
||||
end
|
||||
|
||||
it "respects limit parameter" do
|
||||
concepts = manager.list_concepts(limit: 1)
|
||||
expect(concepts.length).to eq(1)
|
||||
end
|
||||
|
||||
it "returns empty array when no concepts exist" do
|
||||
InferredConcept.destroy_all
|
||||
concepts = manager.list_concepts
|
||||
expect(concepts).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#generate_concepts_from_content" do
|
||||
before do
|
||||
SiteSetting.inferred_concepts_generate_persona = -1
|
||||
SiteSetting.inferred_concepts_enabled = true
|
||||
end
|
||||
|
||||
it "returns empty array for blank content" do
|
||||
expect(manager.generate_concepts_from_content("")).to eq([])
|
||||
expect(manager.generate_concepts_from_content(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "delegates to Finder#identify_concepts" do
|
||||
content = "This is about Ruby programming"
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
|
||||
allow(finder).to receive(:identify_concepts).with(content).and_return(%w[ruby programming])
|
||||
|
||||
allow(finder).to receive(:create_or_find_concepts).with(%w[ruby programming]).and_return(
|
||||
[concept1],
|
||||
)
|
||||
|
||||
result = manager.generate_concepts_from_content(content)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#generate_concepts_from_topic" do
|
||||
it "returns empty array for blank topic" do
|
||||
expect(manager.generate_concepts_from_topic(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "extracts content and generates concepts" do
|
||||
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
|
||||
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
|
||||
allow(applier).to receive(:topic_content_for_analysis).with(topic).and_return("topic content")
|
||||
|
||||
# Mock the finder instead of stubbing subject
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
allow(finder).to receive(:identify_concepts).with("topic content").and_return(%w[programming])
|
||||
allow(finder).to receive(:create_or_find_concepts).with(%w[programming]).and_return(
|
||||
[concept1],
|
||||
)
|
||||
|
||||
result = manager.generate_concepts_from_topic(topic)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#generate_concepts_from_post" do
|
||||
it "returns empty array for blank post" do
|
||||
expect(manager.generate_concepts_from_post(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "extracts content and generates concepts" do
|
||||
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
|
||||
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
|
||||
allow(applier).to receive(:post_content_for_analysis).with(post).and_return("post content")
|
||||
|
||||
# Mock the finder instead of stubbing subject
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
allow(finder).to receive(:identify_concepts).with("post content").and_return(%w[testing])
|
||||
allow(finder).to receive(:create_or_find_concepts).with(%w[testing]).and_return([concept1])
|
||||
|
||||
result = manager.generate_concepts_from_post(post)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_topic_to_concepts" do
|
||||
it "returns empty array for blank topic" do
|
||||
expect(manager.match_topic_to_concepts(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "delegates to Applier#match_existing_concepts" do
|
||||
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
|
||||
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
|
||||
|
||||
allow(applier).to receive(:match_existing_concepts).with(topic).and_return([concept1])
|
||||
|
||||
result = manager.match_topic_to_concepts(topic)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_post_to_concepts" do
|
||||
it "returns empty array for blank post" do
|
||||
expect(manager.match_post_to_concepts(nil)).to eq([])
|
||||
end
|
||||
|
||||
it "delegates to Applier#match_existing_concepts_for_post" do
|
||||
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
|
||||
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
|
||||
|
||||
allow(applier).to receive(:match_existing_concepts_for_post).with(post).and_return([concept1])
|
||||
|
||||
result = manager.match_post_to_concepts(post)
|
||||
expect(result).to eq([concept1])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#search_topics_by_concept" do
|
||||
it "returns empty array for non-existent concept" do
|
||||
result = manager.search_topics_by_concept("nonexistent")
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "returns topics associated with concept" do
|
||||
concept1.topics << topic
|
||||
result = manager.search_topics_by_concept("programming")
|
||||
expect(result).to include(topic)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#search_posts_by_concept" do
|
||||
it "returns empty array for non-existent concept" do
|
||||
result = manager.search_posts_by_concept("nonexistent")
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "returns posts associated with concept" do
|
||||
concept1.posts << post
|
||||
result = manager.search_posts_by_concept("programming")
|
||||
expect(result).to include(post)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#match_content_to_concepts" do
|
||||
it "returns empty array when no concepts exist" do
|
||||
InferredConcept.destroy_all
|
||||
result = manager.match_content_to_concepts("some content")
|
||||
expect(result).to eq([])
|
||||
end
|
||||
|
||||
it "delegates to Applier#match_concepts_to_content" do
|
||||
content = "programming content"
|
||||
existing_concepts = %w[programming testing]
|
||||
applier = instance_double(DiscourseAi::InferredConcepts::Applier)
|
||||
|
||||
all_double = instance_double(ActiveRecord::Relation)
|
||||
allow(InferredConcept).to receive(:all).and_return(all_double)
|
||||
allow(all_double).to receive(:pluck).with(:name).and_return(existing_concepts)
|
||||
|
||||
allow(DiscourseAi::InferredConcepts::Applier).to receive(:new).and_return(applier)
|
||||
allow(applier).to receive(:match_concepts_to_content).with(
|
||||
content,
|
||||
existing_concepts,
|
||||
).and_return(["programming"])
|
||||
|
||||
result = manager.match_content_to_concepts(content)
|
||||
expect(result).to eq(["programming"])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#find_candidate_topics" do
|
||||
it "delegates to Finder#find_candidate_topics with options" do
|
||||
opts = { limit: 50, min_posts: 3 }
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
|
||||
allow(finder).to receive(:find_candidate_topics).with(**opts).and_return([topic])
|
||||
|
||||
result = manager.find_candidate_topics(opts)
|
||||
expect(result).to eq([topic])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#find_candidate_posts" do
|
||||
it "delegates to Finder#find_candidate_posts with options" do
|
||||
opts = { limit: 25, min_likes: 2 }
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
|
||||
allow(finder).to receive(:find_candidate_posts).with(**opts).and_return([post])
|
||||
|
||||
result = manager.find_candidate_posts(opts)
|
||||
expect(result).to eq([post])
|
||||
end
|
||||
end
|
||||
|
||||
describe "#deduplicate_concepts_by_letter" do
|
||||
before do
|
||||
# Create test concepts
|
||||
%w[apple application banana berry cat car dog].each do |name|
|
||||
Fabricate(:inferred_concept, name: name)
|
||||
end
|
||||
end
|
||||
|
||||
it "groups concepts by first letter and deduplicates" do
|
||||
finder = instance_double(DiscourseAi::InferredConcepts::Finder)
|
||||
allow(DiscourseAi::InferredConcepts::Finder).to receive(:new).and_return(finder)
|
||||
|
||||
allow(finder).to receive(:deduplicate_concepts).at_least(:once).and_return(
|
||||
%w[apple banana cat dog],
|
||||
)
|
||||
|
||||
allow(InferredConcept).to receive(:where).and_call_original
|
||||
allow(InferredConcept).to receive(:insert_all).and_call_original
|
||||
|
||||
manager.deduplicate_concepts_by_letter
|
||||
end
|
||||
|
||||
it "handles empty concept list" do
|
||||
InferredConcept.destroy_all
|
||||
expect { manager.deduplicate_concepts_by_letter }.not_to raise_error
|
||||
end
|
||||
end
|
||||
end
|
32
spec/lib/personas/concept_deduplicator_spec.rb
Normal file
32
spec/lib/personas/concept_deduplicator_spec.rb
Normal file
@ -0,0 +1,32 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Personas::ConceptDeduplicator do
|
||||
let(:persona) { described_class.new }
|
||||
|
||||
describe ".default_enabled" do
|
||||
it "is disabled by default" do
|
||||
expect(described_class.default_enabled).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#system_prompt" do
|
||||
let(:prompt) { persona.system_prompt }
|
||||
|
||||
it "specifies output format" do
|
||||
expect(prompt).to include("<streamlined_tags>")
|
||||
expect(prompt).to include("<o>")
|
||||
expect(prompt).to include('"streamlined_tags": ["tag1", "tag3"]')
|
||||
expect(prompt).to include("</o>")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#response_format" do
|
||||
it "defines correct response format" do
|
||||
format = persona.response_format
|
||||
|
||||
expect(format).to eq(
|
||||
[{ "array_type" => "string", "key" => "streamlined_tags", "type" => "array" }],
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
63
spec/lib/personas/concept_finder_spec.rb
Normal file
63
spec/lib/personas/concept_finder_spec.rb
Normal file
@ -0,0 +1,63 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Personas::ConceptFinder do
|
||||
let(:persona) { described_class.new }
|
||||
|
||||
describe ".default_enabled" do
|
||||
it "is disabled by default" do
|
||||
expect(described_class.default_enabled).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#system_prompt" do
|
||||
before do
|
||||
Fabricate(:inferred_concept, name: "programming")
|
||||
Fabricate(:inferred_concept, name: "testing")
|
||||
Fabricate(:inferred_concept, name: "ruby")
|
||||
end
|
||||
|
||||
it "includes existing concepts when available" do
|
||||
prompt = persona.system_prompt
|
||||
|
||||
InferredConcept.all.each { |concept| expect(prompt).to include(concept.name) }
|
||||
end
|
||||
|
||||
it "handles empty existing concepts" do
|
||||
InferredConcept.destroy_all
|
||||
prompt = persona.system_prompt
|
||||
|
||||
expect(prompt).not_to include("following concepts already exist")
|
||||
expect(prompt).to include("advanced concept tagging system")
|
||||
end
|
||||
|
||||
it "limits existing concepts to 100" do
|
||||
manager = instance_double(DiscourseAi::InferredConcepts::Manager)
|
||||
allow(DiscourseAi::InferredConcepts::Manager).to receive(:new).and_return(manager)
|
||||
allow(manager).to receive(:list_concepts).with(limit: 100).and_return(%w[concept1 concept2])
|
||||
|
||||
persona.system_prompt
|
||||
end
|
||||
|
||||
it "includes format instructions" do
|
||||
prompt = persona.system_prompt
|
||||
|
||||
expect(prompt).to include("<o>")
|
||||
expect(prompt).to include('{"concepts": ["concept1", "concept2", "concept3"]}')
|
||||
expect(prompt).to include("</o>")
|
||||
end
|
||||
|
||||
it "includes language preservation instruction" do
|
||||
prompt = persona.system_prompt
|
||||
|
||||
expect(prompt).to include("original language of the text")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#response_format" do
|
||||
it "defines correct response format" do
|
||||
format = persona.response_format
|
||||
|
||||
expect(format).to eq([{ "array_type" => "string", "key" => "concepts", "type" => "array" }])
|
||||
end
|
||||
end
|
||||
end
|
36
spec/lib/personas/concept_matcher_spec.rb
Normal file
36
spec/lib/personas/concept_matcher_spec.rb
Normal file
@ -0,0 +1,36 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Personas::ConceptMatcher do
|
||||
let(:persona) { described_class.new }
|
||||
|
||||
describe ".default_enabled" do
|
||||
it "is disabled by default" do
|
||||
expect(described_class.default_enabled).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#system_prompt" do
|
||||
let(:prompt) { persona.system_prompt }
|
||||
|
||||
it "includes placeholder for concept list" do
|
||||
expect(prompt).to include("{inferred_concepts}")
|
||||
end
|
||||
|
||||
it "specifies output format" do
|
||||
expect(prompt).to include("matching_concepts")
|
||||
expect(prompt).to include("<o>")
|
||||
expect(prompt).to include('{"matching_concepts": ["concept1", "concept3", "concept5"]}')
|
||||
expect(prompt).to include("</o>")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#response_format" do
|
||||
it "defines correct response format" do
|
||||
format = persona.response_format
|
||||
|
||||
expect(format).to eq(
|
||||
[{ "array_type" => "string", "key" => "matching_concepts", "type" => "array" }],
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona
|
||||
{participants}
|
||||
{time}
|
||||
{resource_url}
|
||||
{inferred_concepts}
|
||||
PROMPT
|
||||
end
|
||||
end
|
||||
@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||
end
|
||||
|
||||
let(:resource_url) { "https://path-to-resource" }
|
||||
let(:inferred_concepts) { %w[bulbassaur charmander squirtle].join(", ") }
|
||||
|
||||
let(:context) do
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||
time: Time.zone.now,
|
||||
participants: topic_with_users.allowed_users.map(&:username).join(", "),
|
||||
resource_url: resource_url,
|
||||
inferred_concepts: inferred_concepts,
|
||||
)
|
||||
end
|
||||
|
||||
@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||
expect(system_message).to include("joe, jane")
|
||||
expect(system_message).to include(Time.zone.now.to_s)
|
||||
expect(system_message).to include(resource_url)
|
||||
expect(system_message).to include(inferred_concepts)
|
||||
|
||||
tools = rendered.tools
|
||||
|
||||
|
61
spec/models/inferred_concept_spec.rb
Normal file
61
spec/models/inferred_concept_spec.rb
Normal file
@ -0,0 +1,61 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe InferredConcept do
|
||||
describe "validations" do
|
||||
it "requires a name" do
|
||||
concept = InferredConcept.new
|
||||
expect(concept).not_to be_valid
|
||||
expect(concept.errors[:name]).to include("can't be blank")
|
||||
end
|
||||
|
||||
it "requires unique names" do
|
||||
Fabricate(:inferred_concept, name: "ruby")
|
||||
concept = InferredConcept.new(name: "ruby")
|
||||
expect(concept).not_to be_valid
|
||||
expect(concept.errors[:name]).to include("has already been taken")
|
||||
end
|
||||
|
||||
it "is valid with a unique name" do
|
||||
concept = Fabricate(:inferred_concept, name: "programming")
|
||||
expect(concept).to be_valid
|
||||
end
|
||||
end
|
||||
|
||||
describe "associations" do
|
||||
fab!(:topic)
|
||||
fab!(:post)
|
||||
fab!(:concept) { Fabricate(:inferred_concept, name: "programming") }
|
||||
|
||||
it "can be associated with topics" do
|
||||
concept.topics << topic
|
||||
expect(concept.topics).to include(topic)
|
||||
expect(topic.inferred_concepts).to include(concept)
|
||||
end
|
||||
|
||||
it "can be associated with posts" do
|
||||
concept.posts << post
|
||||
expect(concept.posts).to include(post)
|
||||
expect(post.inferred_concepts).to include(concept)
|
||||
end
|
||||
|
||||
it "can have multiple topics and posts" do
|
||||
topic2 = Fabricate(:topic)
|
||||
post2 = Fabricate(:post)
|
||||
|
||||
concept.topics << [topic, topic2]
|
||||
concept.posts << [post, post2]
|
||||
|
||||
expect(concept.topics.count).to eq(2)
|
||||
expect(concept.posts.count).to eq(2)
|
||||
end
|
||||
end
|
||||
|
||||
describe "database constraints" do
|
||||
it "has the expected schema" do
|
||||
concept = Fabricate(:inferred_concept)
|
||||
expect(concept).to respond_to(:name)
|
||||
expect(concept).to respond_to(:created_at)
|
||||
expect(concept).to respond_to(:updated_at)
|
||||
end
|
||||
end
|
||||
end
|
@ -19,7 +19,7 @@ RSpec.describe DiscourseAi::Admin::AiFeaturesController do
|
||||
get "/admin/plugins/discourse-ai/ai-features.json"
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["ai_features"].count).to eq(4)
|
||||
expect(response.parsed_body["ai_features"].count).to eq(5)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -29,7 +29,7 @@ RSpec.describe "Admin AI features configuration", type: :system, js: true do
|
||||
).to eq(I18n.t("discourse_ai.features.summarization.name"))
|
||||
|
||||
expect(ai_features_page).to have_configured_feature_items(1)
|
||||
expect(ai_features_page).to have_unconfigured_feature_items(3)
|
||||
expect(ai_features_page).to have_unconfigured_feature_items(4)
|
||||
end
|
||||
|
||||
it "lists the persona used for the corresponding AI feature" do
|
||||
|
Loading…
x
Reference in New Issue
Block a user