FEATURE: Automatic translation and localization of posts, topics, categories (#1376)

Related: https://github.com/discourse/discourse-translator/pull/310

This commit includes all the jobs and event hooks to localize posts, topics, and categories.

A few notes:
- `feature_name: "translation"` because the site setting is `ai-translation` and module is `Translation`
- we will switch to proper ai-feature in the near future, and can consider using the persona_user as `localization.localizer_user_id`
- keeping things flat within the module for now as we will be moving to ai-feature soon and have to rearrange
- Settings renamed/introduced are:
  - ai_translation_backfill_rate (0)
  - ai_translation_backfill_limit_to_public_content (true)
  - ai_translation_backfill_max_age_days (5)
  - ai_translation_verbose_logs (false)
This commit is contained in:
Natalie Tay 2025-05-29 17:28:06 +08:00 committed by GitHub
parent ad5c48d9ae
commit 373e2305d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
45 changed files with 2791 additions and 0 deletions

View File

@ -0,0 +1,51 @@
# frozen_string_literal: true
module Jobs
class DetectTranslatePost < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if args[:post_id].blank?
post = Post.find_by(id: args[:post_id])
return if post.blank? || post.raw.blank? || post.deleted_at.present? || post.user_id <= 0
if SiteSetting.ai_translation_backfill_limit_to_public_content
topic = post.topic
if topic.blank? || topic.category&.read_restricted? ||
topic.archetype == Archetype.private_message
return
end
end
begin
detected_locale = DiscourseAi::Translation::PostLocaleDetector.detect_locale(post)
rescue FinalDestination::SSRFDetector::LookupFailedError
# this job is non-critical
# the backfill job will handle failures
return
end
locales = SiteSetting.experimental_content_localization_supported_locales.split("|")
return if locales.blank?
locales.each do |locale|
next if locale == detected_locale
begin
DiscourseAi::Translation::PostLocalizer.localize(post, locale)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to translate post #{post.id} to #{locale}: #{e.message}",
)
end
end
MessageBus.publish("/topic/#{post.topic_id}", type: :localized, id: post.id)
end
end
end

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module Jobs
class DetectTranslateTopic < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if args[:topic_id].blank?
topic = Topic.find_by(id: args[:topic_id])
if topic.blank? || topic.title.blank? || topic.deleted_at.present? || topic.user_id <= 0
return
end
if SiteSetting.ai_translation_backfill_limit_to_public_content
return if topic.category&.read_restricted?
end
begin
detected_locale = DiscourseAi::Translation::TopicLocaleDetector.detect_locale(topic)
rescue FinalDestination::SSRFDetector::LookupFailedError
# this job is non-critical
# the backfill job will handle failures
return
end
locales = SiteSetting.experimental_content_localization_supported_locales.split("|")
return if locales.blank?
locales.each do |locale|
next if locale == detected_locale
begin
DiscourseAi::Translation::TopicLocalizer.localize(topic, locale)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to translate topic #{topic.id} to #{locale}: #{e.message}",
)
end
end
MessageBus.publish("/topic/#{topic.id}", type: :localized, id: 1)
end
end
end

View File

@ -0,0 +1,51 @@
# frozen_string_literal: true
module Jobs
class LocalizeCategories < ::Jobs::Base
cluster_concurrency 1
sidekiq_options retry: false
BATCH_SIZE = 50
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
locales = SiteSetting.experimental_content_localization_supported_locales.split("|")
return if locales.blank?
cat_id = args[:from_category_id] || Category.order(:id).first&.id
last_id = nil
categories = Category.where("id >= ?", cat_id).order(:id).limit(BATCH_SIZE)
return if categories.empty?
categories.each do |category|
if SiteSetting.ai_translation_backfill_limit_to_public_content && category.read_restricted?
last_id = category.id
next
end
CategoryLocalization.transaction do
locales.each do |locale|
next if CategoryLocalization.exists?(category_id: category.id, locale: locale)
begin
DiscourseAi::Translation::CategoryLocalizer.localize(category, locale)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to translate category #{category.id} to #{locale}: #{e.message}",
)
end
end
end
last_id = category.id
end
if categories.size == BATCH_SIZE
Jobs.enqueue_in(10.seconds, :localize_categories, from_category_id: last_id + 1)
end
end
end
end

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
module Jobs
class LocalizePosts < ::Jobs::Base
cluster_concurrency 1
sidekiq_options retry: false
BATCH_SIZE = 50
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
locales = SiteSetting.experimental_content_localization_supported_locales.split("|")
return if locales.blank?
limit = args[:limit] || BATCH_SIZE
locales.each do |locale|
posts =
Post
.joins(
"LEFT JOIN post_localizations pl ON pl.post_id = posts.id AND pl.locale = #{ActiveRecord::Base.connection.quote(locale)}",
)
.where(deleted_at: nil)
.where("posts.user_id > 0")
.where.not(raw: [nil, ""])
.where.not(locale: nil)
.where.not(locale: locale)
.where("pl.id IS NULL")
if SiteSetting.ai_translation_backfill_limit_to_public_content
posts =
posts
.joins(:topic)
.where(topics: { category_id: Category.where(read_restricted: false).select(:id) })
.where.not(topics: { archetype: Archetype.private_message })
end
if SiteSetting.ai_translation_backfill_max_age_days > 0
posts =
posts.where(
"posts.created_at > ?",
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
)
end
posts = posts.order(updated_at: :desc).limit(limit)
next if posts.empty?
posts.each do |post|
begin
DiscourseAi::Translation::PostLocalizer.localize(post, locale)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to translate post #{post.id} to #{locale}: #{e.message}",
)
end
end
DiscourseAi::Translation::VerboseLogger.log("Translated #{posts.size} posts to #{locale}")
end
end
end
end

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
module Jobs
class LocalizeTopics < ::Jobs::Base
cluster_concurrency 1
sidekiq_options retry: false
BATCH_SIZE = 50
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
locales = SiteSetting.experimental_content_localization_supported_locales.split("|")
return if locales.blank?
limit = args[:limit] || BATCH_SIZE
locales.each do |locale|
topics =
Topic
.joins(
"LEFT JOIN topic_localizations tl ON tl.topic_id = topics.id AND tl.locale = #{ActiveRecord::Base.connection.quote(locale)}",
)
.where(deleted_at: nil)
.where("topics.user_id > 0")
.where.not(locale: nil)
.where.not(locale: locale)
.where("tl.id IS NULL")
if SiteSetting.ai_translation_backfill_limit_to_public_content
topics = topics.where(category_id: Category.where(read_restricted: false).select(:id))
end
if SiteSetting.ai_translation_backfill_max_age_days > 0
topics =
topics.where(
"topics.created_at > ?",
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
)
end
topics = topics.order(updated_at: :desc).limit(limit)
next if topics.empty?
topics.each do |topic|
begin
DiscourseAi::Translation::TopicLocalizer.localize(topic, locale)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to translate topic #{topic.id} to #{locale}: #{e.message}",
)
end
end
DiscourseAi::Translation::VerboseLogger.log("Translated #{topics.size} topics to #{locale}")
end
end
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
module Jobs
class CategoryLocalizationBackfill < ::Jobs::Scheduled
every 12.hours
cluster_concurrency 1
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if SiteSetting.experimental_content_localization_supported_locales.blank?
Jobs.enqueue(:localize_categories)
end
end
end

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
module Jobs
class PostLocalizationBackfill < ::Jobs::Scheduled
every 5.minutes
cluster_concurrency 1
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if SiteSetting.experimental_content_localization_supported_locales.blank?
return if SiteSetting.ai_translation_backfill_rate == 0
Jobs.enqueue(:localize_posts, limit: SiteSetting.ai_translation_backfill_rate)
end
end
end

View File

@ -0,0 +1,56 @@
# frozen_string_literal: true
module Jobs
class PostsLocaleDetectionBackfill < ::Jobs::Scheduled
every 5.minutes
sidekiq_options retry: false
cluster_concurrency 1
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if SiteSetting.ai_translation_backfill_rate == 0
posts =
Post
.where(locale: nil)
.where(deleted_at: nil)
.where("posts.user_id > 0")
.where.not(raw: [nil, ""])
if SiteSetting.ai_translation_backfill_limit_to_public_content
public_categories = Category.where(read_restricted: false).pluck(:id)
posts =
posts
.joins(:topic)
.where(topics: { category_id: public_categories })
.where(topics: { archetype: "regular" })
end
if SiteSetting.ai_translation_backfill_max_age_days > 0
posts =
posts.where(
"posts.created_at > ?",
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
)
end
posts = posts.order(updated_at: :desc).limit(SiteSetting.ai_translation_backfill_rate)
return if posts.empty?
posts.each do |post|
begin
DiscourseAi::Translation::PostLocaleDetector.detect_locale(post)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to detect post #{post.id}'s locale: #{e.message}",
)
end
end
DiscourseAi::Translation::VerboseLogger.log("Detected #{posts.size} post locales")
end
end
end

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
module Jobs
class TopicLocalizationBackfill < ::Jobs::Scheduled
every 5.minutes
cluster_concurrency 1
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
return if SiteSetting.experimental_content_localization_supported_locales.blank?
return if SiteSetting.ai_translation_backfill_rate == 0
Jobs.enqueue(:localize_topics, limit: SiteSetting.ai_translation_backfill_rate)
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
module Jobs
class TopicsLocaleDetectionBackfill < ::Jobs::Scheduled
every 5.minutes
sidekiq_options retry: false
cluster_concurrency 1
def execute(args)
return if !SiteSetting.discourse_ai_enabled
return if !SiteSetting.ai_translation_enabled
limit = SiteSetting.ai_translation_backfill_rate
return if limit == 0
topics = Topic.where(locale: nil, deleted_at: nil).where("topics.user_id > 0")
if SiteSetting.ai_translation_backfill_limit_to_public_content
topics = topics.where(category_id: Category.where(read_restricted: false).select(:id))
end
if SiteSetting.ai_translation_backfill_max_age_days > 0
topics =
topics.where(
"topics.created_at > ?",
SiteSetting.ai_translation_backfill_max_age_days.days.ago,
)
end
topics = topics.order(updated_at: :desc).limit(limit)
return if topics.empty?
topics.each do |topic|
begin
DiscourseAi::Translation::TopicLocaleDetector.detect_locale(topic)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to detect topic #{topic.id}'s locale: #{e.message}",
)
end
end
DiscourseAi::Translation::VerboseLogger.log("Detected #{topics.size} topic locales")
end
end
end

View File

@ -0,0 +1,24 @@
import { apiInitializer } from "discourse/lib/api";
import cookie from "discourse/lib/cookie";
export default apiInitializer((api) => {
const settings = api.container.lookup("service:site-settings");
if (!settings.discourse_ai_enabled || !settings.ai_translation_enabled) {
return;
}
api.registerCustomPostMessageCallback(
"localized",
(topicController, data) => {
if (!cookie("content-localization-show-original")) {
const postStream = topicController.get("model.postStream");
postStream.triggerChangedPost(data.id, data.updated_at).then(() => {
topicController.appEvents.trigger("post-stream:refresh", {
id: data.id,
});
});
}
}
);
});

View File

@ -401,3 +401,19 @@ discourse_ai:
allow_any: false
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
ai_translation_backfill_rate:
default: 0
client: false
hidden: true
ai_translation_backfill_limit_to_public_content:
default: true
client: false
hidden: true
ai_translation_backfill_max_age_days:
default: 5
client: false
hidden: true
ai_translation_verbose_logs:
default: false
client: false
hidden: true

View File

@ -0,0 +1,60 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class BaseTranslator
def initialize(text, target_language)
@text = text
@target_language = target_language
end
def translate
prompt =
DiscourseAi::Completions::Prompt.new(
prompt_template,
messages: [{ type: :user, content: formatted_content, id: "user" }],
)
structured_output =
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_translation_model).generate(
prompt,
user: Discourse.system_user,
feature_name: "translation",
response_format: response_format,
)
structured_output&.read_buffered_property(:translation)
end
def formatted_content
{ content: @text, target_language: @target_language }.to_json
end
def response_format
{
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
translation: {
type: "string",
},
},
required: ["translation"],
additionalProperties: false,
},
strict: true,
},
}
end
private
def prompt_template
raise NotImplementedError
end
end
end
end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class CategoryLocalizer
def self.localize(category, target_locale = I18n.locale)
return if category.blank? || target_locale.blank?
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
translated_name = ShortTextTranslator.new(category.name, target_locale_sym).translate
# category descriptions are first paragraphs of posts
translated_description =
PostRawTranslator.new(category.description, target_locale_sym).translate
localization =
CategoryLocalization.find_or_initialize_by(
category_id: category.id,
locale: target_locale_sym.to_s,
)
localization.name = translated_name
localization.description = translated_description
localization.save!
localization
end
end
end
end

View File

@ -0,0 +1,109 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class ContentSplitter
CHUNK_SIZE = 3000
BBCODE_PATTERNS = [
%r{\[table.*?\].*?\[/table\]}m,
%r{\[quote.*?\].*?\[/quote\]}m,
%r{\[details.*?\].*?\[/details\]}m,
%r{\<details.*?\>.*?\</details\>}m,
%r{\[spoiler.*?\].*?\[/spoiler\]}m,
%r{\[code.*?\].*?\[/code\]}m,
/```.*?```/m,
].freeze
TEXT_BOUNDARIES = [
/\n\s*\n\s*|\r\n\s*\r\n\s*/, # double newlines with optional spaces
/[.!?]\s+/, # sentence endings
/[,;]\s+/, # clause endings
/\n|\r\n/, # single newlines
/\s+/, # any whitespace
].freeze
def self.split(content)
return [] if content.nil?
return [""] if content.empty?
return [content] if content.length <= CHUNK_SIZE
chunks = []
remaining = content.dup
while remaining.present?
chunk = extract_mixed_chunk(remaining)
break if chunk.empty?
chunks << chunk
remaining = remaining[chunk.length..-1]
end
chunks
end
private
def self.extract_mixed_chunk(text, size: CHUNK_SIZE)
return text if text.length <= size
flexible_size = size * 1.5
# try each splitting strategy in order
split_point =
[
-> { find_nearest_html_end_index(text, size) },
-> { find_nearest_bbcode_end_index(text, size) },
-> { find_text_boundary(text, size) },
-> { size },
].lazy.map(&:call).compact.find { |pos| pos <= flexible_size }
text[0...split_point]
end
def self.find_nearest_html_end_index(text, target_pos)
return nil if !text.include?("<")
begin
doc = Nokogiri::HTML5.fragment(text)
current_length = 0
doc.children.each do |node|
html = node.to_html
end_pos = current_length + html.length
return end_pos if end_pos > target_pos
current_length = end_pos
end
nil
rescue Nokogiri::SyntaxError
nil
end
end
def self.find_nearest_bbcode_end_index(text, target_pos)
BBCODE_PATTERNS.each do |pattern|
text.scan(pattern) do |_|
match = $~
tag_start = match.begin(0)
tag_end = match.end(0)
return tag_end if tag_start <= target_pos && tag_end > target_pos
end
end
nil
end
def self.find_text_boundary(text, target_pos)
search_text = text
TEXT_BOUNDARIES.each do |pattern|
if pos = search_text.rindex(pattern, target_pos)
# Include all trailing whitespace
pos += 1 while pos < search_text.length && search_text[pos].match?(/\s/)
return pos
end
end
nil
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class EntryPoint
def inject_into(plugin)
plugin.on(:post_process_cooked) do |_, post|
if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled
Jobs.enqueue(:detect_translate_post, post_id: post.id)
end
end
plugin.on(:topic_created) do |topic|
if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled
Jobs.enqueue(:detect_translate_topic, topic_id: topic.id)
end
end
plugin.on(:post_edited) do |post, topic_changed|
if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled && topic_changed
Jobs.enqueue(:detect_translate_topic, topic_id: post.topic_id)
end
end
end
end
end
end

View File

@ -0,0 +1,86 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class LanguageDetector
DETECTION_CHAR_LIMIT = 1000
PROMPT_TEXT = <<~TEXT
You will be given a piece of text, and your task is to detect the locale (language) of the text and return it in a specific JSON format.
To complete this task, follow these steps:
1. Carefully read and analyze the provided text.
2. Determine the language of the text based on its characteristics, such as vocabulary, grammar, and sentence structure.
3. Do not use links or programing code in the text to detect the locale
4. Identify the appropriate language code for the detected language.
Here is a list of common language codes for reference:
- English: en
- Spanish: es
- French: fr
- German: de
- Italian: it
- Brazilian Portuguese: pt-BR
- Russian: ru
- Simplified Chinese: zh-CN
- Japanese: ja
- Korean: ko
If the language is not in this list, use the appropriate IETF language tag code.
5. Format your response as a JSON object with a single key "locale" and the value as the language code.
Your output should be in the following format:
<output>
{"locale": "xx"}
</output>
Where "xx" is replaced by the appropriate language code.
Important: Base your analysis solely on the provided text. Do not use any external information or make assumptions about the text's origin or context beyond what is explicitly provided.
TEXT
def initialize(text)
@text = text
end
def detect
prompt =
DiscourseAi::Completions::Prompt.new(
PROMPT_TEXT,
messages: [{ type: :user, content: @text, id: "user" }],
)
structured_output =
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_translation_model).generate(
prompt,
user: Discourse.system_user,
feature_name: "translation",
response_format: response_format,
)
structured_output&.read_buffered_property(:locale)
end
def response_format
{
type: "json_schema",
json_schema: {
name: "reply",
schema: {
type: "object",
properties: {
locale: {
type: "string",
},
},
required: ["locale"],
additionalProperties: false,
},
strict: true,
},
}
end
end
end
end

View File

@ -0,0 +1,43 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class LocaleNormalizer
# Normalizes locale string, matching the list of I18n.locales where possible
# @param locale [String,Symbol] the locale to normalize
# @return [String] the normalized locale
def self.normalize_to_i18n(locale)
return nil if locale.blank?
locale = locale.to_s.gsub("-", "_")
i18n_pairs.each { |downcased, value| return value if locale.downcase == downcased }
locale
end
private
def self.i18n_pairs
# they should look like this for the input to match against:
# {
# "lowercased" => "actual",
# "en" => "en",
# "zh_cn" => "zh_CN",
# "zh" => "zh_CN",
# }
@locale_map ||=
I18n
.available_locales
.reduce({}) do |output, sym|
locale = sym.to_s
output[locale.downcase] = locale
if locale.include?("_")
short = locale.split("_").first
output[short] = locale if output[short].blank?
end
output
end
end
end
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class PostLocaleDetector
def self.detect_locale(post)
return if post.blank?
detected_locale = LanguageDetector.new(post.raw).detect
locale = LocaleNormalizer.normalize_to_i18n(detected_locale)
post.update_column(:locale, locale)
locale
end
end
end
end

View File

@ -0,0 +1,28 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class PostLocalizer
def self.localize(post, target_locale = I18n.locale)
return if post.blank? || target_locale.blank? || post.locale == target_locale.to_s
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
translated_raw =
ContentSplitter
.split(post.raw)
.map { |chunk| PostRawTranslator.new(chunk, target_locale_sym).translate }
.join("")
localization =
PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale_sym.to_s)
localization.raw = translated_raw
localization.cooked = PrettyText.cook(translated_raw)
localization.post_version = post.version
localization.localizer_user_id = Discourse.system_user.id
localization.save!
localization
end
end
end
end

View File

@ -0,0 +1,45 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class PostRawTranslator < BaseTranslator
PROMPT_TEMPLATE = <<~TEXT.freeze
You are a highly skilled translator tasked with translating content from one language to another. Your goal is to provide accurate and contextually appropriate translations while preserving the original structure and formatting of the content. Follow these instructions carefully:
Translation Instructions:
1. Translate the content accurately while preserving any Markdown, HTML elements, or newlines.
2. Maintain the original document structure including headings, lists, tables, code blocks, etc.
3. Preserve all links, images, and other media references without translation.
4. Handle code snippets appropriately:
- Do not translate variable names, functions, or syntax within code blocks (```).
- Translate comments within code blocks.
5. For technical terminology:
- Provide the accepted target language term if it exists.
- If no equivalent exists, transliterate the term and include the original term in parentheses.
6. For ambiguous terms or phrases, choose the most contextually appropriate translation.
7. Do not add any content besides the translation.
8. Ensure the translation only contains the original language and the target language.
Output your translation in the following JSON format:
{"translation": "Your TARGET_LANGUAGE translation here"}
Here are three examples of correct translations:
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_language":"Spanish"}
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_language":"French"}
Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."}
Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_language": "English"}
Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"}
Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response.
TEXT
private def prompt_template
PROMPT_TEMPLATE
end
end
end
end

View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class ShortTextTranslator < BaseTranslator
PROMPT_TEMPLATE = <<~TEXT.freeze
You are a translation service specializing in translating short pieces of text or a few words.
These words may be things like a name, description, or title. Adhere to the following guidelines:
1. Keep proper nouns and technical terms in their original language
2. Keep the translated content close to the original length
3. Translation maintains the original meaning
4. Preserving any Markdown, HTML elements, links, parenthesis, or newlines
Provide your translation in the following JSON format:
<output>
{"translation": "target_language translation here"}
</output>
Here are three examples of correct translation
Original: {"content":"Japan", "target_language":"Spanish"}
Correct translation: {"translation": "Japón"}
Original: {"name":"Cats and Dogs", "target_language":"Chinese"}
Correct translation: {"translation": "猫和狗"}
Original: {"name": "Q&A", "target_language": "Portuguese"}
Correct translation: {"translation": "Perguntas e Respostas"}
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format.
TEXT
private def prompt_template
PROMPT_TEMPLATE
end
end
end
end

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class TopicLocaleDetector
def self.detect_locale(topic)
return if topic.blank?
text = topic.title.dup
text << " #{topic.first_post.raw}" if topic.first_post.raw
detected_locale = LanguageDetector.new(text).detect
locale = LocaleNormalizer.normalize_to_i18n(detected_locale)
topic.update_column(:locale, locale)
locale
end
end
end
end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class TopicLocalizer
def self.localize(topic, target_locale = I18n.locale)
return if topic.blank? || target_locale.blank? || topic.locale == target_locale.to_s
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
translated_title = TopicTitleTranslator.new(topic.title, target_locale_sym).translate
translated_excerpt = ShortTextTranslator.new(topic.excerpt, target_locale_sym).translate
localization =
TopicLocalization.find_or_initialize_by(
topic_id: topic.id,
locale: target_locale_sym.to_s,
)
localization.title = translated_title
localization.fancy_title = Topic.fancy_title(translated_title)
localization.excerpt = translated_excerpt
localization.localizer_user_id = Discourse.system_user.id
localization.save!
localization
end
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class TopicTitleTranslator < BaseTranslator
PROMPT_TEMPLATE = <<~TEXT.freeze
You are a translation service specializing in translating forum post titles from English to the asked target_language. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines:
1. Translate the given title from English to target_language asked.
2. Keep proper nouns and technical terms in their original language.
3. Attempt to keep the translated title length close to the original when possible.
4. Ensure the translation maintains the original meaning and tone.
To complete this task:
1. Read and understand the title carefully.
2. Identify any proper nouns or technical terms that should remain untranslated.
3. Translate the remaining words and phrases into the target_language, ensuring the meaning is preserved.
4. Adjust the translation if necessary to keep the length similar to the original title.
5. Review your translation for accuracy and naturalness in the target_language.
Provide your translation in the following JSON format:
<output>
{"translation": "Your target_language translation here"}
</output>
Here are three examples of correct translation
Original: {"title":"New Update for Minecraft Adds Underwater Temples", "target_language":"Spanish"}
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
Original: {"title":"Toyota announces revolutionary battery technology", "target_language":"French"}
Correct translation: {"translation": "Toyota annonce une technologie de batteries révolutionnaire"}
Original: {"title": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_language": "English"}
Correct translation: {"translation": "Heathrow closed: flight disruption expected to continue in coming days, says London airport management"}
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the title now and provide your answer in the specified JSON format.
TEXT
private def prompt_template
PROMPT_TEMPLATE
end
end
end
end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
module DiscourseAi
module Translation
class VerboseLogger
def self.log(message, opts = { level: :warn })
if SiteSetting.ai_translation_verbose_logs
Rails.logger.send(opts[:level], "DiscourseAi::Translation: #{message}")
end
end
end
end
end

View File

@ -100,6 +100,7 @@ after_initialize do
DiscourseAi::Summarization::EntryPoint.new,
DiscourseAi::AiBot::EntryPoint.new,
DiscourseAi::AiModeration::EntryPoint.new,
DiscourseAi::Translation::EntryPoint.new,
].each { |a_module| a_module.inject_into(self) }
register_problem_check ProblemCheck::AiLlmStatus

View File

@ -0,0 +1,88 @@
# frozen_string_literal: true
describe Jobs::DetectTranslatePost do
fab!(:post)
subject(:job) { described_class.new }
let(:locales) { %w[en ja] }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = locales.join("|")
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({ post_id: post.id })
end
it "does nothing when content translation is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({ post_id: post.id })
end
it "detects locale" do
SiteSetting.discourse_ai_enabled = true
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).once
DiscourseAi::Translation::PostLocalizer.expects(:localize).twice
job.execute({ post_id: post.id })
end
it "skips bot posts" do
post.update!(user: Discourse.system_user)
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({ post_id: post.id })
end
it "does not translate when no target languages are configured" do
SiteSetting.experimental_content_localization_supported_locales = ""
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).returns("en")
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({ post_id: post.id })
end
it "skips translating to the post's language" do
post.update(locale: "en")
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).returns("en")
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once
job.execute({ post_id: post.id })
end
it "handles translation errors gracefully" do
post.update(locale: "en")
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).returns("en")
DiscourseAi::Translation::PostLocalizer.expects(:localize).raises(
StandardError.new("API error"),
)
expect { job.execute({ post_id: post.id }) }.not_to raise_error
end
it "skips public content when `ai_translation_backfill_limit_to_public_content ` site setting is enabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = true
post.topic.category.update!(read_restricted: true)
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).never
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({ post_id: post.id })
pm_post = Fabricate(:post, topic: Fabricate(:private_message_topic))
job.execute({ post_id: pm_post.id })
end
end

View File

@ -0,0 +1,85 @@
# frozen_string_literal: true
describe Jobs::DetectTranslateTopic do
fab!(:topic)
subject(:job) { described_class.new }
let(:locales) { %w[en ja] }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = locales.join("|")
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({ topic_id: topic.id })
end
it "does nothing when content translation is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({ topic_id: topic.id })
end
it "detects locale" do
SiteSetting.discourse_ai_enabled = true
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).twice
job.execute({ topic_id: topic.id })
end
it "skips bot topics" do
topic.update!(user: Discourse.system_user)
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({ topic_id: topic.id })
end
it "does not translate when no target languages are configured" do
SiteSetting.experimental_content_localization_supported_locales = ""
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({ topic_id: topic.id })
end
it "skips translating to the topic's language" do
topic.update(locale: "en")
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").once
job.execute({ topic_id: topic.id })
end
it "handles translation errors gracefully" do
topic.update(locale: "en")
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).raises(
StandardError.new("API error"),
)
expect { job.execute({ topic_id: topic.id }) }.not_to raise_error
end
it "skips public content when `ai_translation_backfill_limit_to_public_content ` site setting is enabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = true
topic.category.update!(read_restricted: true)
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({ topic_id: topic.id })
end
end

View File

@ -0,0 +1,153 @@
# frozen_string_literal: true
describe Jobs::LocalizeCategories do
subject(:job) { described_class.new }
def localize_all_categories(*locales)
Category.all.each do |category|
locales.each { |locale| Fabricate(:category_localization, category:, locale:, name: "x") }
end
end
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = "pt|zh_CN"
Jobs.run_immediately!
end
it "does nothing when DiscourseAi::Translation::CategoryLocalizer is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when ai_translation_enabled is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when no target languages are configured" do
SiteSetting.experimental_content_localization_supported_locales = ""
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when no categories exist" do
Category.destroy_all
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).never
job.execute({})
end
it "translates categories to the configured locales" do
number_of_categories = Category.count
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(is_a(Category), "pt")
.times(number_of_categories)
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(is_a(Category), "zh_CN")
.times(number_of_categories)
job.execute({})
end
it "skips categories that already have localizations" do
localize_all_categories("pt", "zh_CN")
category1 =
Fabricate(:category, name: "First Category", description: "First category description")
Fabricate(:category_localization, category: category1, locale: "pt", name: "Primeira Categoria")
# It should only translate to Chinese, not Portuguese
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "pt").never
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "zh_CN").once
job.execute({})
end
it "continues from a specified category ID" do
category1 = Fabricate(:category, name: "First", description: "First description")
category2 = Fabricate(:category, name: "Second", description: "Second description")
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(category1, any_parameters)
.never
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(category2, any_parameters)
.twice
job.execute(from_category_id: category2.id)
end
it "handles translation errors gracefully" do
localize_all_categories("pt", "zh_CN")
category1 = Fabricate(:category, name: "First", description: "First description")
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(category1, "pt")
.raises(StandardError.new("API error"))
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "zh_CN").once
expect { job.execute({}) }.not_to raise_error
end
it "enqueues the next batch when there are more categories" do
Jobs.run_later!
freeze_time
Jobs::LocalizeCategories.const_set(:BATCH_SIZE, 1)
job.execute({})
Category.all.each do |category|
puts category.id
expect_job_enqueued(
job: :localize_categories,
args: {
from_category_id: category.id + 1,
},
at: 10.seconds.from_now,
)
end
Jobs::LocalizeCategories.send(:remove_const, :BATCH_SIZE)
Jobs::LocalizeCategories.const_set(:BATCH_SIZE, 50)
end
it "skips read-restricted categories when configured" do
SiteSetting.ai_translation_backfill_limit_to_public_content = true
category1 = Fabricate(:category, name: "Public Category", read_restricted: false)
category2 = Fabricate(:category, name: "Private Category", read_restricted: true)
DiscourseAi::Translation::CategoryLocalizer.expects(:localize).at_least_once
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(category1, any_parameters)
.twice
DiscourseAi::Translation::CategoryLocalizer
.expects(:localize)
.with(category2, any_parameters)
.never
job.execute({})
end
end

View File

@ -0,0 +1,193 @@
# frozen_string_literal: true
describe Jobs::LocalizePosts do
fab!(:post)
subject(:job) { described_class.new }
let(:locales) { %w[en ja de] }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = locales.join("|")
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when ai_translation_enabled is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when no target languages are configured" do
SiteSetting.experimental_content_localization_supported_locales = ""
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when there are no posts to translate" do
Post.destroy_all
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "skips posts that already have localizations" do
Post.all.each do |post|
Fabricate(:post_localization, post:, locale: "en")
Fabricate(:post_localization, post:, locale: "ja")
end
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "skips bot posts" do
post.update!(user: Discourse.system_user)
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").never
job.execute({})
end
it "handles translation errors gracefully" do
post.update(locale: "es")
DiscourseAi::Translation::PostLocalizer
.expects(:localize)
.with(post, "en")
.raises(StandardError.new("API error"))
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "de").once
expect { job.execute({}) }.not_to raise_error
end
it "logs a summary after translation" do
post.update(locale: "es")
DiscourseAi::Translation::PostLocalizer.stubs(:localize)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Translated 1 posts to en"))
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Translated 1 posts to ja"))
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Translated 1 posts to de"))
job.execute({})
end
context "for translation scenarios" do
it "scenario 1: skips post when locale is not set" do
DiscourseAi::Translation::PostLocalizer.expects(:localize).never
job.execute({})
end
it "scenario 2: returns post with locale 'es' if localizations for en/ja/de do not exist" do
post = Fabricate(:post, locale: "es")
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "de").once
job.execute({})
end
it "scenario 3: returns post with locale 'en' if ja/de localization does not exist" do
post = Fabricate(:post, locale: "en")
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "de").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never
job.execute({})
end
it "scenario 4: skips post with locale 'en' if 'ja' localization already exists" do
post = Fabricate(:post, locale: "en")
Fabricate(:post_localization, post: post, locale: "ja")
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").never
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "de").once
job.execute({})
end
end
describe "with public content limitation" do
fab!(:private_category) { Fabricate(:private_category, group: Group[:staff]) }
fab!(:private_topic) { Fabricate(:topic, category: private_category) }
fab!(:private_post) { Fabricate(:post, topic: private_topic, locale: "es") }
fab!(:pm_post) { Fabricate(:post, topic: Fabricate(:private_message_topic), locale: "es") }
fab!(:public_post) { Fabricate(:post, locale: "es") }
before do
SiteSetting.ai_translation_backfill_limit_to_public_content = true
SiteSetting.experimental_content_localization_supported_locales = "ja"
end
it "only processes posts from public categories" do
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(public_post, "ja").once
DiscourseAi::Translation::PostLocalizer
.expects(:localize)
.with(private_post, any_parameters)
.never
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(pm_post, any_parameters).never
job.execute({})
end
it "processes all posts when setting is disabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = false
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(public_post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(pm_post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(private_post, "ja").once
job.execute({})
end
end
describe "with max age limit" do
fab!(:old_post) { Fabricate(:post, locale: "es", created_at: 10.days.ago) }
fab!(:new_post) { Fabricate(:post, locale: "es", created_at: 2.days.ago) }
before do
SiteSetting.ai_translation_backfill_max_age_days = 5
SiteSetting.experimental_content_localization_supported_locales = "ja"
end
it "only processes posts within the age limit" do
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(new_post, "ja").once
DiscourseAi::Translation::PostLocalizer
.expects(:localize)
.with(old_post, any_parameters)
.never
job.execute({})
end
it "processes all posts when setting is disabled" do
SiteSetting.ai_translation_backfill_max_age_days = 0
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(new_post, "ja").once
DiscourseAi::Translation::PostLocalizer.expects(:localize).with(old_post, "ja").once
job.execute({})
end
end
end

View File

@ -0,0 +1,199 @@
# frozen_string_literal: true
describe Jobs::LocalizeTopics do
fab!(:topic)
subject(:job) { described_class.new }
let(:locales) { %w[en ja de] }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = locales.join("|")
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when ai_translation_enabled is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when no target languages are configured" do
SiteSetting.experimental_content_localization_supported_locales = ""
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "does nothing when there are no topics to translate" do
Topic.destroy_all
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "skips topics that already have localizations" do
Topic.all.each do |topic|
Fabricate(:topic_localization, topic:, locale: "en")
Fabricate(:topic_localization, topic:, locale: "ja")
end
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "skips bot topics" do
topic.update!(user: Discourse.system_user)
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").never
job.execute({})
end
it "handles translation errors gracefully" do
topic.update(locale: "es")
DiscourseAi::Translation::TopicLocalizer
.expects(:localize)
.with(topic, "en")
.raises(StandardError.new("API error"))
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "de").once
expect { job.execute({}) }.not_to raise_error
end
it "logs a summary after translation" do
topic.update(locale: "es")
DiscourseAi::Translation::TopicLocalizer.stubs(:localize)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(
includes("Translated 1 topics to en"),
)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(
includes("Translated 1 topics to ja"),
)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(
includes("Translated 1 topics to de"),
)
job.execute({})
end
context "for translation scenarios" do
it "scenario 1: skips topic when locale is not set" do
DiscourseAi::Translation::TopicLocalizer.expects(:localize).never
job.execute({})
end
it "scenario 2: returns topic with locale 'es' if localizations for en/ja/de do not exist" do
topic = Fabricate(:topic, locale: "es")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "de").once
job.execute({})
end
it "scenario 3: returns topic with locale 'en' if ja/de localization does not exist" do
topic = Fabricate(:topic, locale: "en")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "de").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").never
job.execute({})
end
it "scenario 4: skips topic with locale 'en' if 'ja' localization already exists" do
topic = Fabricate(:topic, locale: "en")
Fabricate(:topic_localization, topic: topic, locale: "ja")
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").never
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "de").once
job.execute({})
end
end
describe "with public content limitation" do
fab!(:private_category) { Fabricate(:private_category, group: Group[:staff]) }
fab!(:private_topic) { Fabricate(:topic, category: private_category, locale: "es") }
fab!(:public_topic) { Fabricate(:topic, locale: "es") }
before { SiteSetting.ai_translation_backfill_limit_to_public_content = true }
it "only processes topics from public categories" do
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "de").once
DiscourseAi::Translation::TopicLocalizer
.expects(:localize)
.with(private_topic, any_parameters)
.never
job.execute({})
end
it "processes all topics when setting is disabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = false
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(public_topic, "de").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(private_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(private_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(private_topic, "de").once
job.execute({})
end
end
describe "with max age limit" do
fab!(:old_topic) { Fabricate(:topic, locale: "es", created_at: 10.days.ago) }
fab!(:new_topic) { Fabricate(:topic, locale: "es", created_at: 2.days.ago) }
before { SiteSetting.ai_translation_backfill_max_age_days = 5 }
it "only processes topics within the age limit" do
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "de").once
DiscourseAi::Translation::TopicLocalizer
.expects(:localize)
.with(old_topic, any_parameters)
.never
job.execute({})
end
it "processes all topics when setting is disabled" do
SiteSetting.ai_translation_backfill_max_age_days = 0
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "de").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(old_topic, "en").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(old_topic, "ja").once
DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(old_topic, "de").once
job.execute({})
end
end
end

View File

@ -0,0 +1,58 @@
# frozen_string_literal: true
describe Jobs::PostLocalizationBackfill do
before do
SiteSetting.ai_translation_backfill_rate = 100
SiteSetting.experimental_content_localization_supported_locales = "en"
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
end
it "does not enqueue post translation when translator disabled" do
SiteSetting.discourse_ai_enabled = false
described_class.new.execute({})
expect_not_enqueued_with(job: :localize_posts)
end
it "does not enqueue post translation when experimental translation disabled" do
SiteSetting.discourse_ai_enabled = true
SiteSetting.ai_translation_enabled = false
described_class.new.execute({})
expect_not_enqueued_with(job: :localize_posts)
end
it "does not enqueue psot translation if backfill languages are not set" do
SiteSetting.discourse_ai_enabled = true
SiteSetting.ai_translation_enabled = true
SiteSetting.experimental_content_localization_supported_locales = ""
described_class.new.execute({})
expect_not_enqueued_with(job: :localize_posts)
end
it "does not enqueue post translation if backfill limit is set to 0" do
SiteSetting.discourse_ai_enabled = true
SiteSetting.ai_translation_enabled = true
SiteSetting.ai_translation_backfill_rate = 0
described_class.new.execute({})
expect_not_enqueued_with(job: :localize_posts)
end
it "enqueues post translation with correct limit" do
SiteSetting.discourse_ai_enabled = true
SiteSetting.ai_translation_enabled = true
SiteSetting.ai_translation_backfill_rate = 10
described_class.new.execute({})
expect_job_enqueued(job: :localize_posts, args: { limit: 10 })
end
end

View File

@ -0,0 +1,135 @@
# frozen_string_literal: true
describe Jobs::PostsLocaleDetectionBackfill do
fab!(:post) { Fabricate(:post, locale: nil) }
subject(:job) { described_class.new }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.ai_translation_backfill_rate = 100
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "does nothing when content translation is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "does nothing when there are no posts to detect" do
Post.update_all(locale: "en")
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "detects locale for posts with nil locale" do
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).once
job.execute({})
end
it "detects most recently updated posts first" do
post_2 = Fabricate(:post, locale: nil)
post_3 = Fabricate(:post, locale: nil)
post.update!(updated_at: 3.days.ago)
post_2.update!(updated_at: 2.day.ago)
post_3.update!(updated_at: 4.day.ago)
SiteSetting.ai_translation_backfill_rate = 1
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post_2).once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).never
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post_3).never
job.execute({})
end
it "skips bot posts" do
post.update!(user: Discourse.system_user)
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).never
job.execute({})
end
it "handles detection errors gracefully" do
DiscourseAi::Translation::PostLocaleDetector
.expects(:detect_locale)
.with(post)
.raises(StandardError.new("jiboomz"))
.once
expect { job.execute({}) }.not_to raise_error
end
it "logs a summary after running" do
DiscourseAi::Translation::PostLocaleDetector.stubs(:detect_locale)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Detected 1 post locales"))
job.execute({})
end
describe "with public content limitation" do
fab!(:private_category) { Fabricate(:private_category, group: Group[:staff]) }
fab!(:private_topic) { Fabricate(:topic, category: private_category) }
fab!(:private_post) { Fabricate(:post, topic: private_topic, locale: nil) }
before { SiteSetting.ai_translation_backfill_limit_to_public_content = true }
it "only processes posts from public categories" do
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(private_post).never
job.execute({})
end
it "processes all posts when setting is disabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = false
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(private_post).once
job.execute({})
end
end
describe "with max age limit" do
fab!(:old_post) { Fabricate(:post, locale: nil, created_at: 10.days.ago) }
fab!(:new_post) { Fabricate(:post, locale: nil, created_at: 2.days.ago) }
before { SiteSetting.ai_translation_backfill_max_age_days = 5 }
it "only processes posts within the age limit" do
# other posts
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).at_least_once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(new_post).once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(old_post).never
job.execute({})
end
it "processes all posts when setting is disabled" do
SiteSetting.ai_translation_backfill_max_age_days = 0
# other posts
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).at_least_once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(new_post).once
DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(old_post).once
job.execute({})
end
end
end

View File

@ -0,0 +1,140 @@
# frozen_string_literal: true
describe Jobs::TopicsLocaleDetectionBackfill do
fab!(:topic) { Fabricate(:topic, locale: nil) }
subject(:job) { described_class.new }
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
SiteSetting.ai_translation_backfill_rate = 100
end
it "does nothing when translator is disabled" do
SiteSetting.discourse_ai_enabled = false
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "does nothing when content translation is disabled" do
SiteSetting.ai_translation_enabled = false
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "does nothing when there are no topics to detect" do
Topic.update_all(locale: "en")
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never
job.execute({})
end
it "detects locale for topics with nil locale" do
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).once
job.execute({})
end
it "detects most recently updated topics first" do
topic_2 = Fabricate(:topic, locale: nil)
topic_3 = Fabricate(:topic, locale: nil)
topic.update!(updated_at: 3.days.ago)
topic_2.update!(updated_at: 2.day.ago)
topic_3.update!(updated_at: 4.day.ago)
SiteSetting.ai_translation_backfill_rate = 1
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic_2).once
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).never
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic_3).never
job.execute({})
end
it "skips bot topics" do
topic.update!(user: Discourse.system_user)
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).never
job.execute({})
end
it "handles detection errors gracefully" do
DiscourseAi::Translation::TopicLocaleDetector
.expects(:detect_locale)
.with(topic)
.raises(StandardError.new("jiboomz"))
.once
expect { job.execute({}) }.not_to raise_error
end
it "logs a summary after running" do
DiscourseAi::Translation::TopicLocaleDetector.stubs(:detect_locale)
DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Detected 1 topic locales"))
job.execute({})
end
describe "with public content limitation" do
fab!(:private_category) { Fabricate(:private_category, group: Group[:staff]) }
fab!(:public_topic) { Fabricate(:topic, locale: nil) }
fab!(:private_topic) { Fabricate(:topic, category: private_category, locale: nil) }
before do
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).at_least_once
SiteSetting.ai_translation_backfill_limit_to_public_content = true
end
it "only processes topics from public categories" do
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(public_topic).once
DiscourseAi::Translation::TopicLocaleDetector
.expects(:detect_locale)
.with(private_topic)
.never
job.execute({})
end
it "processes all topics when setting is disabled" do
SiteSetting.ai_translation_backfill_limit_to_public_content = false
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(public_topic).once
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(private_topic).once
job.execute({})
end
end
describe "with max age limit" do
fab!(:old_topic) { Fabricate(:topic, locale: nil, created_at: 10.days.ago) }
fab!(:new_topic) { Fabricate(:topic, locale: nil, created_at: 2.days.ago) }
before do
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).at_least_once
SiteSetting.ai_translation_backfill_max_age_days = 5
end
it "only processes topics within the age limit" do
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(new_topic).once
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(old_topic).never
job.execute({})
end
it "processes all topics when setting is disabled" do
SiteSetting.ai_translation_backfill_max_age_days = 0
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(new_topic).once
DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(old_topic).once
job.execute({})
end
end
end

View File

@ -0,0 +1,65 @@
# frozen_string_literal: true
require "rails_helper"
describe DiscourseAi::Translation::BaseTranslator do
before do
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
end
describe ".translate" do
let(:text_to_translate) { "cats are great" }
let(:target_language) { "de" }
let(:llm_response) { "hur dur hur dur!" }
it "creates the correct prompt" do
post_translator =
DiscourseAi::Translation::PostRawTranslator.new(text_to_translate, target_language)
allow(DiscourseAi::Completions::Prompt).to receive(:new).with(
DiscourseAi::Translation::PostRawTranslator::PROMPT_TEMPLATE,
messages: [{ type: :user, content: post_translator.formatted_content, id: "user" }],
).and_call_original
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
post_translator.translate
end
end
it "sends the translation prompt to the selected ai helper model" do
mock_prompt = instance_double(DiscourseAi::Completions::Prompt)
mock_llm = instance_double(DiscourseAi::Completions::Llm)
post_translator =
DiscourseAi::Translation::PostRawTranslator.new(text_to_translate, target_language)
structured_output =
DiscourseAi::Completions::StructuredOutput.new({ translation: { type: "string" } })
structured_output << { translation: llm_response }.to_json
allow(DiscourseAi::Completions::Prompt).to receive(:new).and_return(mock_prompt)
allow(DiscourseAi::Completions::Llm).to receive(:proxy).with(
SiteSetting.ai_translation_model,
).and_return(mock_llm)
allow(mock_llm).to receive(:generate).with(
mock_prompt,
user: Discourse.system_user,
feature_name: "translation",
response_format: post_translator.response_format,
).and_return(structured_output)
post_translator.translate
end
it "returns the translation from the llm's response" do
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
expect(
DiscourseAi::Translation::PostRawTranslator.new(
text_to_translate,
target_language,
).translate,
).to eq "hur dur hur dur!"
end
end
end
end

View File

@ -0,0 +1,95 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::CategoryLocalizer do
subject(:localizer) { described_class }
before do
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
end
def post_raw_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::PostRawTranslator)
allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
def short_text_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::ShortTextTranslator)
allow(DiscourseAi::Translation::ShortTextTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
fab!(:category) do
Fabricate(:category, name: "Test Category", description: "This is a test category")
end
describe ".localize" do
let(:target_locale) { :fr }
it "translates the category name and description" do
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Catégorie de Test"
short_text_translator_stub(
{ value: category.name, locale: target_locale, translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: target_locale, translated: translated_cat_desc },
)
res = localizer.localize(category, target_locale)
expect(res.name).to eq(translated_cat_name)
expect(res.description).to eq(translated_cat_desc)
end
it "handles locale format standardization" do
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Catégorie de Test"
short_text_translator_stub(
{ value: category.name, locale: :fr, translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: :fr, translated: translated_cat_desc },
)
res = localizer.localize(category, "fr")
expect(res.name).to eq(translated_cat_name)
expect(res.description).to eq(translated_cat_desc)
end
it "returns nil if category is blank" do
expect(localizer.localize(nil)).to be_nil
end
it "returns nil if target locale is blank" do
expect(localizer.localize(category, nil)).to be_nil
end
it "uses I18n.locale as default when no target locale is provided" do
I18n.locale = :es
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Esta es una categoría de prueba"
short_text_translator_stub(
{ value: category.name, locale: :es, translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: :es, translated: translated_cat_desc },
)
res = localizer.localize(category)
expect(res.name).to eq(translated_cat_name)
expect(res.description).to eq(translated_cat_desc)
expect(res.locale).to eq("es")
end
end
end

View File

@ -0,0 +1,97 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::ContentSplitter do
let(:original_limit) { 4000 }
after { described_class.const_set(:CHUNK_SIZE, original_limit) }
def set_limit(value)
described_class.const_set(:CHUNK_SIZE, value)
end
it "returns empty array for empty input" do
expect(described_class.split("")).to eq([""])
end
it "handles content with only spaces" do
expect(described_class.split(" ")).to eq([" "])
expect(described_class.split(" ")).to eq([" "])
end
it "handles nil input" do
expect(described_class.split(nil)).to eq([])
end
it "doesn't split content under limit" do
text = "hello world"
expect(described_class.split(text)).to eq([text])
end
it "preserves HTML tags" do
set_limit(10)
text = "<p>hello</p><p>meow</p>"
expect(described_class.split(text)).to eq(%w[<p>hello</p> <p>meow</p>])
set_limit(35)
text = "<div>hello</div> <div>jurassic</div> <p>world</p>"
expect(described_class.split(text)).to eq(
["<div>hello</div> <div>jurassic</div>", " <p>world</p>"],
)
end
it "preserves BBCode tags" do
set_limit(20)
text = "[quote]hello[/quote][details]world[/details]"
expect(described_class.split(text)).to eq(["[quote]hello[/quote]", "[details]world[/details]"])
end
it "doesn't split in middle of words" do
set_limit(10)
text = "my kitty best in the world"
expect(described_class.split(text)).to eq(["my kitty ", "best in ", "the world"])
end
it "handles nested tags properly" do
set_limit(25)
text = "<div>hello<p>cat</p>world</div><p>meow</p>"
expect(described_class.split(text)).to eq(%w[<div>hello<p>cat</p>world</div> <p>meow</p>])
end
it "handles mixed HTML and BBCode" do
set_limit(15)
text = "<div>hello</div>[quote]world[/quote]<p>beautiful</p>"
expect(described_class.split(text)).to eq(
["<div>hello</div>", "[quote]world[/quote]", "<p>beautiful</p>"],
)
end
it "preserves newlines in sensible places" do
set_limit(10)
text = "hello\nbeautiful\nworld\n"
expect(described_class.split(text)).to eq(["hello\n", "beautiful\n", "world\n"])
end
it "handles email content properly" do
set_limit(20)
text = "From: test@test.com\nTo: other@test.com\nSubject: Hello\n\nContent here"
expect(described_class.split(text)).to eq(
["From: test@test.com\n", "To: other@test.com\n", "Subject: Hello\n\n", "Content here"],
)
end
it "keeps code blocks intact" do
set_limit(30)
text = "Text\n```\ncode block\nhere\n```\nmore text"
expect(described_class.split(text)).to eq(["Text\n```\ncode block\nhere\n```\n", "more text"])
end
context "with multiple details tags" do
it "splits correctly between details tags" do
set_limit(30)
text = "<details>first content</details><details>second content</details>"
expect(described_class.split(text)).to eq(
["<details>first content</details>", "<details>second content</details>"],
)
end
end
end

View File

@ -0,0 +1,82 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::EntryPoint do
before do
SiteSetting.discourse_ai_enabled = true
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
SiteSetting.ai_translation_enabled = true
end
describe "upon post process cooked" do
it "enqueues detect post locale and translate post job" do
post = Fabricate(:post)
CookedPostProcessor.new(post).post_process
expect_job_enqueued(job: :detect_translate_post, args: { post_id: post.id })
end
it "does not enqueue if setting disabled" do
SiteSetting.ai_translation_enabled = false
post = Fabricate(:post)
CookedPostProcessor.new(post).post_process
expect(job_enqueued?(job: :detect_translate_post, args: { post_id: post.id })).to eq false
end
end
describe "upon topic created" do
it "enqueues detect topic locale and translate topic job" do
topic =
PostCreator.create!(
Fabricate(:admin),
raw: "post",
title: "topic",
skip_validations: true,
).topic
expect_job_enqueued(job: :detect_translate_topic, args: { topic_id: topic.id })
end
it "does not enqueue if setting disabled" do
SiteSetting.ai_translation_enabled = false
topic =
PostCreator.create!(
Fabricate(:admin),
raw: "post",
title: "topic",
skip_validations: true,
).topic
expect(job_enqueued?(job: :detect_translate_topic, args: { topic_id: topic.id })).to eq false
end
end
describe "upon first post (topic) edited" do
fab!(:post) { Fabricate(:post, post_number: 1) }
fab!(:non_first_post) { Fabricate(:post, post_number: 2) }
it "enqueues detect topic locale and translate topic job" do
SiteSetting.ai_translation_enabled = true
topic = post.topic
revisor = PostRevisor.new(post, topic)
revisor.revise!(
post.user,
{ title: "A whole new hole" },
{ validate_post: false, bypass_bump: false },
)
revisor.post_process_post
expect_job_enqueued(job: :detect_translate_topic, args: { topic_id: topic.id })
end
it "does not enqueue if setting disabled" do
SiteSetting.ai_translation_enabled = false
expect(
job_enqueued?(job: :detect_translate_topic, args: { topic_id: post.topic_id }),
).to eq false
end
end
end

View File

@ -0,0 +1,53 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::LanguageDetector do
before do
Fabricate(:fake_model).tap do |fake_llm|
SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}")
end
end
describe ".detect" do
let(:locale_detector) { described_class.new("meow") }
let(:llm_response) { "hur dur hur dur!" }
it "creates the correct prompt" do
allow(DiscourseAi::Completions::Prompt).to receive(:new).with(
DiscourseAi::Translation::LanguageDetector::PROMPT_TEXT,
messages: [{ type: :user, content: "meow", id: "user" }],
).and_call_original
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
locale_detector.detect
end
end
it "sends the language detection prompt to the ai helper model" do
mock_prompt = instance_double(DiscourseAi::Completions::Prompt)
mock_llm = instance_double(DiscourseAi::Completions::Llm)
structured_output =
DiscourseAi::Completions::StructuredOutput.new({ locale: { type: "string" } })
structured_output << { locale: llm_response }.to_json
allow(DiscourseAi::Completions::Prompt).to receive(:new).and_return(mock_prompt)
allow(DiscourseAi::Completions::Llm).to receive(:proxy).with(
SiteSetting.ai_translation_model,
).and_return(mock_llm)
allow(mock_llm).to receive(:generate).with(
mock_prompt,
user: Discourse.system_user,
feature_name: "translation",
response_format: locale_detector.response_format,
).and_return(structured_output)
locale_detector.detect
end
it "returns the language from the llm's response in the language tag" do
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
locale_detector.detect
end
end
end
end

View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::LocaleNormalizer do
it "matches input locales to i18n locales" do
expect(described_class.normalize_to_i18n("en-GB")).to eq("en_GB")
expect(described_class.normalize_to_i18n("en")).to eq("en")
expect(described_class.normalize_to_i18n("zh")).to eq("zh_CN")
expect(described_class.normalize_to_i18n("tr")).to eq("tr_TR")
end
it "converts dashes to underscores" do
expect(described_class.normalize_to_i18n("a-b")).to eq("a_b")
end
end

View File

@ -0,0 +1,35 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::PostLocaleDetector do
describe ".detect_locale" do
fab!(:post) { Fabricate(:post, raw: "Hello world", locale: nil) }
def language_detector_stub(opts)
mock = instance_double(DiscourseAi::Translation::LanguageDetector)
allow(DiscourseAi::Translation::LanguageDetector).to receive(:new).with(
opts[:text],
).and_return(mock)
allow(mock).to receive(:detect).and_return(opts[:locale])
end
it "returns nil if post is blank" do
expect(described_class.detect_locale(nil)).to eq(nil)
end
it "updates the post locale with the detected locale" do
language_detector_stub({ text: post.raw, locale: "zh_CN" })
expect { described_class.detect_locale(post) }.to change { post.reload.locale }.from(nil).to(
"zh_CN",
)
end
it "bypasses validations when updating locale" do
post.update_column(:raw, "A")
language_detector_stub({ text: post.raw, locale: "zh_CN" })
described_class.detect_locale(post)
expect(post.reload.locale).to eq("zh_CN")
end
end
end

View File

@ -0,0 +1,75 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::PostLocalizer do
describe ".localize" do
fab!(:post) { Fabricate(:post, raw: "Hello world", version: 1) }
let(:translator) { mock }
let(:translated_raw) { "こんにちは世界" }
let(:cooked) { "<p>こんにちは世界</p>" }
let(:target_locale) { "ja" }
def post_raw_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::PostRawTranslator)
allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
it "returns nil if post is blank" do
expect(described_class.localize(nil, "ja")).to eq(nil)
end
it "returns nil if target_locale is blank" do
expect(described_class.localize(post, nil)).to eq(nil)
expect(described_class.localize(post, "")).to eq(nil)
end
it "returns nil if target_locale is same as post locale" do
post.locale = "en"
expect(described_class.localize(post, "en")).to eq(nil)
end
it "translates with post and locale" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
described_class.localize(post, "ja")
end
it "normalizes dashes to underscores and symbol type for locale" do
post_raw_translator_stub({ value: post.raw, locale: :zh_CN, translated: "你好,世界" })
described_class.localize(post, "zh-CN")
end
it "finds or creates a PostLocalization and sets its fields" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
expect {
res = described_class.localize(post, target_locale)
expect(res).to be_a(PostLocalization)
expect(res).to have_attributes(
post_id: post.id,
locale: target_locale,
raw: translated_raw,
cooked: cooked,
post_version: post.version,
localizer_user_id: Discourse.system_user.id,
)
}.to change { PostLocalization.count }.by(1)
end
it "updates an existing PostLocalization if present" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
localization =
Fabricate(:post_localization, post: post, locale: "ja", raw: "old", cooked: "old_cooked")
expect {
out = described_class.localize(post, "ja")
expect(out.id).to eq(localization.id)
expect(out.raw).to eq(translated_raw)
expect(out.cooked).to eq(cooked)
}.to_not change { PostLocalization.count }
end
end
end

View File

@ -0,0 +1,38 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::TopicLocaleDetector do
describe ".detect_locale" do
fab!(:topic) { Fabricate(:topic, title: "this is a cat topic", locale: nil) }
fab!(:post) { Fabricate(:post, raw: "and kittens", topic:) }
def language_detector_stub(opts)
mock = instance_double(DiscourseAi::Translation::LanguageDetector)
allow(DiscourseAi::Translation::LanguageDetector).to receive(:new).with(
opts[:text],
).and_return(mock)
allow(mock).to receive(:detect).and_return(opts[:locale])
end
it "returns nil if topic title is blank" do
expect(described_class.detect_locale(nil)).to eq(nil)
end
it "updates the topic locale with the detected locale" do
language_detector_stub({ text: "This is a cat topic and kittens", locale: "zh_CN" })
expect { described_class.detect_locale(topic) }.to change { topic.reload.locale }.from(
nil,
).to("zh_CN")
end
it "bypasses validations when updating locale" do
topic.update_column(:title, "A")
SiteSetting.min_topic_title_length = 15
SiteSetting.max_topic_title_length = 16
language_detector_stub({ text: "A and kittens", locale: "zh_CN" })
described_class.detect_locale(topic)
expect(topic.reload.locale).to eq("zh_CN")
end
end
end

View File

@ -0,0 +1,117 @@
# frozen_string_literal: true
describe DiscourseAi::Translation::TopicLocalizer do
describe ".localize" do
fab!(:topic) do
Fabricate(
:topic,
title: "this is a cat topic :)",
excerpt: "cats are great. how many do you have?",
)
end
let(:translator) { mock }
let(:translated_title) { "これは猫の話題です :)" }
let(:translated_excerpt) { "猫は素晴らしいですね。何匹飼っていますか?" }
let(:fancy_title) { "これは猫の話題です :slight_smile:" }
let(:target_locale) { "ja" }
def topic_title_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::TopicTitleTranslator)
allow(DiscourseAi::Translation::TopicTitleTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
def short_text_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::ShortTextTranslator)
allow(DiscourseAi::Translation::ShortTextTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
it "returns nil if topic is blank" do
expect(described_class.localize(nil, "ja")).to eq(nil)
end
it "returns nil if target_locale is blank" do
expect(described_class.localize(topic, nil)).to eq(nil)
expect(described_class.localize(topic, "")).to eq(nil)
end
it "returns nil if target_locale is same as topic locale" do
topic.locale = "en"
expect(described_class.localize(topic, "en")).to eq(nil)
end
it "translates with topic and locale" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
)
described_class.localize(topic, "ja")
end
it "normalizes dashes to underscores and symbol type for locale" do
topic_title_translator_stub({ value: topic.title, locale: :zh_CN, translated: "这是一个猫主题 :)" })
short_text_translator_stub({ value: topic.excerpt, locale: :zh_CN, translated: "这是一个猫主题 :)" })
described_class.localize(topic, "zh-CN")
end
it "finds or creates a TopicLocalization and sets its fields" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
)
expect {
res = described_class.localize(topic, target_locale)
expect(res).to be_a(TopicLocalization)
expect(res).to have_attributes(
topic_id: topic.id,
locale: target_locale,
title: translated_title,
excerpt: translated_excerpt,
fancy_title: fancy_title,
localizer_user_id: Discourse.system_user.id,
)
}.to change { TopicLocalization.count }.by(1)
end
it "updates an existing TopicLocalization if present" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
)
localization =
Fabricate(
:topic_localization,
topic:,
locale: "ja",
title: "old title",
excerpt: "old excerpt",
fancy_title: "old_fancy_title",
)
expect {
expect(described_class.localize(topic, "ja")).to have_attributes(
id: localization.id,
title: translated_title,
fancy_title: fancy_title,
excerpt: translated_excerpt,
)
expect(localization.reload).to have_attributes(
title: translated_title,
fancy_title: fancy_title,
excerpt: translated_excerpt,
)
}.to_not change { TopicLocalization.count }
end
end
end