From d54cd1f6024e2e5f5ae064e1e7917a021d97221b Mon Sep 17 00:00:00 2001 From: Natalie Tay Date: Wed, 9 Jul 2025 22:21:51 +0800 Subject: [PATCH] DEV: Normalize locales that are similar (e.g. en and en_GB) so they do not get translated (#1495) This commit - normalizes locales like en_GB and variants to en. With this, the feature will not translate en_GB posts to en (or similarly pt_BR to pt_PT) - consolidates whether the feature is enabled in `DiscourseAi::Translation.enabled?` - similarly for backfill in `DiscourseAi::Translation.backfill_enabled?` - turns off backfill if `ai_translation_backfill_max_age_days` is 0 to keep true to what it says. Set it to a high number to backfill everything --- app/jobs/regular/detect_translate_post.rb | 9 +-- app/jobs/regular/detect_translate_topic.rb | 9 +-- app/jobs/regular/localize_categories.rb | 9 +-- app/jobs/regular/localize_posts.rb | 22 +++---- app/jobs/regular/localize_topics.rb | 22 +++---- .../categories_locale_detection_backfill.rb | 6 +- .../category_localization_backfill.rb | 5 +- .../scheduled/post_localization_backfill.rb | 4 +- .../posts_locale_detection_backfill.rb | 14 +---- .../scheduled/topic_localization_backfill.rb | 6 +- .../topics_locale_detection_backfill.rb | 21 +++---- config/locales/server.en.yml | 2 +- lib/translation.rb | 16 +++++ lib/translation/entry_point.rb | 6 +- lib/translation/locale_normalizer.rb | 7 +++ .../regular/detect_translate_post_spec.rb | 23 +++++--- .../regular/detect_translate_topic_spec.rb | 17 +++++- spec/jobs/regular/localize_categories_spec.rb | 12 ++-- spec/jobs/regular/localize_posts_spec.rb | 59 ++++++++++++------- spec/jobs/regular/localize_topics_spec.rb | 23 ++++++-- ...tegories_locale_detection_backfill_spec.rb | 1 + .../posts_locale_detection_backfill_spec.rb | 5 +- .../topics_locale_detection_backfill_spec.rb | 5 +- spec/lib/translation/entry_point_spec.rb | 8 +++ .../lib/translation/locale_normalizer_spec.rb | 42 ++++++++++--- 25 files changed, 215 insertions(+), 138 deletions(-) create mode 100644 lib/translation.rb diff --git a/app/jobs/regular/detect_translate_post.rb b/app/jobs/regular/detect_translate_post.rb index ec2381d2..b739989b 100644 --- a/app/jobs/regular/detect_translate_post.rb +++ b/app/jobs/regular/detect_translate_post.rb @@ -6,8 +6,7 @@ module Jobs sidekiq_options retry: false def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.enabled? return if args[:post_id].blank? post = Post.find_by(id: args[:post_id]) @@ -36,12 +35,14 @@ module Jobs end end + return if detected_locale.blank? locales = SiteSetting.content_localization_supported_locales.split("|") return if locales.blank? locales.each do |locale| - next if locale == detected_locale - next if post.post_localizations.exists?(locale:) + next if DiscourseAi::Translation::LocaleNormalizer.is_same?(locale, detected_locale) + regionless_locale = locale.split("_").first + next if post.post_localizations.where("locale LIKE ?", "#{regionless_locale}%").exists? begin DiscourseAi::Translation::PostLocalizer.localize(post, locale) diff --git a/app/jobs/regular/detect_translate_topic.rb b/app/jobs/regular/detect_translate_topic.rb index 287d5ed1..ae6972b8 100644 --- a/app/jobs/regular/detect_translate_topic.rb +++ b/app/jobs/regular/detect_translate_topic.rb @@ -6,8 +6,7 @@ module Jobs sidekiq_options retry: false def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.enabled? return if args[:topic_id].blank? topic = Topic.find_by(id: args[:topic_id]) @@ -34,12 +33,14 @@ module Jobs end end + return if detected_locale.blank? locales = SiteSetting.content_localization_supported_locales.split("|") return if locales.blank? locales.each do |locale| - next if locale == detected_locale - next if topic.topic_localizations.exists?(locale:) + next if DiscourseAi::Translation::LocaleNormalizer.is_same?(locale, detected_locale) + regionless_locale = locale.split("_").first + next if topic.topic_localizations.where("locale LIKE ?", "#{regionless_locale}%").exists? begin DiscourseAi::Translation::TopicLocalizer.localize(topic, locale) diff --git a/app/jobs/regular/localize_categories.rb b/app/jobs/regular/localize_categories.rb index 987cd254..8c01c6bf 100644 --- a/app/jobs/regular/localize_categories.rb +++ b/app/jobs/regular/localize_categories.rb @@ -6,21 +6,17 @@ module Jobs sidekiq_options retry: false def execute(args) + return if !DiscourseAi::Translation.enabled? + limit = args[:limit] raise Discourse::InvalidParameters.new(:limit) if limit.nil? return if limit <= 0 - - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled locales = SiteSetting.content_localization_supported_locales.split("|") - return if locales.blank? categories = Category.where("locale IS NOT NULL") - if SiteSetting.ai_translation_backfill_limit_to_public_content categories = categories.where(read_restricted: false) end - categories = categories.order(:id).limit(limit) return if categories.empty? @@ -33,6 +29,7 @@ module Jobs missing_locales = locales - existing_locales - [category.locale] missing_locales.each do |locale| break if remaining_limit <= 0 + next if DiscourseAi::Translation::LocaleNormalizer.is_same?(locale, category.locale) begin DiscourseAi::Translation::CategoryLocalizer.localize(category, locale) diff --git a/app/jobs/regular/localize_posts.rb b/app/jobs/regular/localize_posts.rb index 50cf4f6c..062974fd 100644 --- a/app/jobs/regular/localize_posts.rb +++ b/app/jobs/regular/localize_posts.rb @@ -9,23 +9,25 @@ module Jobs limit = args[:limit] raise Discourse::InvalidParameters.new(:limit) if limit.blank? || limit <= 0 - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.backfill_enabled? locales = SiteSetting.content_localization_supported_locales.split("|") - return if locales.blank? - locales.each do |locale| + base_locale = locale.split("_").first posts = Post .joins( - "LEFT JOIN post_localizations pl ON pl.post_id = posts.id AND pl.locale = #{ActiveRecord::Base.connection.quote(locale)}", + "LEFT JOIN post_localizations pl ON pl.post_id = posts.id AND pl.locale LIKE '#{base_locale}%'", + ) + .where( + "posts.created_at > ?", + SiteSetting.ai_translation_backfill_max_age_days.days.ago, ) .where(deleted_at: nil) .where("posts.user_id > 0") .where.not(raw: [nil, ""]) .where.not(locale: nil) - .where.not(locale: locale) + .where("posts.locale NOT LIKE '#{base_locale}%'") .where("pl.id IS NULL") posts = posts.joins(:topic) @@ -46,14 +48,6 @@ module Jobs ) end - if SiteSetting.ai_translation_backfill_max_age_days > 0 - posts = - posts.where( - "posts.created_at > ?", - SiteSetting.ai_translation_backfill_max_age_days.days.ago, - ) - end - posts = posts.order(updated_at: :desc).limit(limit) next if posts.empty? diff --git a/app/jobs/regular/localize_topics.rb b/app/jobs/regular/localize_topics.rb index 7428eb1f..0fa334d3 100644 --- a/app/jobs/regular/localize_topics.rb +++ b/app/jobs/regular/localize_topics.rb @@ -9,22 +9,24 @@ module Jobs limit = args[:limit] raise Discourse::InvalidParameters.new(:limit) if limit.blank? || limit <= 0 - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.backfill_enabled? locales = SiteSetting.content_localization_supported_locales.split("|") - return if locales.blank? - locales.each do |locale| + base_locale = locale.split("_").first topics = Topic .joins( - "LEFT JOIN topic_localizations tl ON tl.topic_id = topics.id AND tl.locale = #{ActiveRecord::Base.connection.quote(locale)}", + "LEFT JOIN topic_localizations tl ON tl.topic_id = topics.id AND tl.locale LIKE '#{base_locale}%'", + ) + .where( + "topics.created_at > ?", + SiteSetting.ai_translation_backfill_max_age_days.days.ago, ) .where(deleted_at: nil) .where("topics.user_id > 0") .where.not(locale: nil) - .where.not(locale: locale) + .where("topics.locale NOT LIKE '#{base_locale}%'") .where("tl.id IS NULL") if SiteSetting.ai_translation_backfill_limit_to_public_content @@ -43,14 +45,6 @@ module Jobs ) end - if SiteSetting.ai_translation_backfill_max_age_days > 0 - topics = - topics.where( - "topics.created_at > ?", - SiteSetting.ai_translation_backfill_max_age_days.days.ago, - ) - end - topics = topics.order(updated_at: :desc).limit(limit) next if topics.empty? diff --git a/app/jobs/scheduled/categories_locale_detection_backfill.rb b/app/jobs/scheduled/categories_locale_detection_backfill.rb index dcfaae13..ae53817b 100644 --- a/app/jobs/scheduled/categories_locale_detection_backfill.rb +++ b/app/jobs/scheduled/categories_locale_detection_backfill.rb @@ -7,10 +7,7 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled - limit = SiteSetting.ai_translation_backfill_hourly_rate - return if limit == 0 + return if !DiscourseAi::Translation.backfill_enabled? categories = Category.where(locale: nil) @@ -18,6 +15,7 @@ module Jobs categories = categories.where(read_restricted: false) end + limit = SiteSetting.ai_translation_backfill_hourly_rate categories = categories.limit(limit) return if categories.empty? diff --git a/app/jobs/scheduled/category_localization_backfill.rb b/app/jobs/scheduled/category_localization_backfill.rb index cffe79a6..520b3349 100644 --- a/app/jobs/scheduled/category_localization_backfill.rb +++ b/app/jobs/scheduled/category_localization_backfill.rb @@ -6,11 +6,8 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled - return if SiteSetting.content_localization_supported_locales.blank? + return if !DiscourseAi::Translation.backfill_enabled? limit = SiteSetting.ai_translation_backfill_hourly_rate - return if limit == 0 Jobs.enqueue(:localize_categories, limit:) end diff --git a/app/jobs/scheduled/post_localization_backfill.rb b/app/jobs/scheduled/post_localization_backfill.rb index d79c6f0d..522328be 100644 --- a/app/jobs/scheduled/post_localization_backfill.rb +++ b/app/jobs/scheduled/post_localization_backfill.rb @@ -6,10 +6,8 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.backfill_enabled? - return if SiteSetting.content_localization_supported_locales.blank? limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals return if limit == 0 diff --git a/app/jobs/scheduled/posts_locale_detection_backfill.rb b/app/jobs/scheduled/posts_locale_detection_backfill.rb index 89c6912c..0a96cd3e 100644 --- a/app/jobs/scheduled/posts_locale_detection_backfill.rb +++ b/app/jobs/scheduled/posts_locale_detection_backfill.rb @@ -7,16 +7,16 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.backfill_enabled? + limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals - return if limit == 0 posts = Post .where(locale: nil) .where(deleted_at: nil) .where("posts.user_id > 0") + .where("posts.created_at > ?", SiteSetting.ai_translation_backfill_max_age_days.days.ago) .where.not(raw: [nil, ""]) if SiteSetting.ai_translation_backfill_limit_to_public_content @@ -33,14 +33,6 @@ module Jobs ) end - if SiteSetting.ai_translation_backfill_max_age_days > 0 - posts = - posts.where( - "posts.created_at > ?", - SiteSetting.ai_translation_backfill_max_age_days.days.ago, - ) - end - posts = posts.order(updated_at: :desc).limit(limit) return if posts.empty? diff --git a/app/jobs/scheduled/topic_localization_backfill.rb b/app/jobs/scheduled/topic_localization_backfill.rb index c1bd64c0..d53e20dc 100644 --- a/app/jobs/scheduled/topic_localization_backfill.rb +++ b/app/jobs/scheduled/topic_localization_backfill.rb @@ -6,13 +6,9 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled + return if !DiscourseAi::Translation.backfill_enabled? - return if SiteSetting.content_localization_supported_locales.blank? limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals - return if limit == 0 - Jobs.enqueue(:localize_topics, limit:) end end diff --git a/app/jobs/scheduled/topics_locale_detection_backfill.rb b/app/jobs/scheduled/topics_locale_detection_backfill.rb index d1bc4c35..c917ddb6 100644 --- a/app/jobs/scheduled/topics_locale_detection_backfill.rb +++ b/app/jobs/scheduled/topics_locale_detection_backfill.rb @@ -7,12 +7,15 @@ module Jobs cluster_concurrency 1 def execute(args) - return if !SiteSetting.discourse_ai_enabled - return if !SiteSetting.ai_translation_enabled - limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals - return if limit == 0 + return if !DiscourseAi::Translation.backfill_enabled? - topics = Topic.where(locale: nil, deleted_at: nil).where("topics.user_id > 0") + limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals + + topics = + Topic + .where(locale: nil, deleted_at: nil) + .where("topics.user_id > 0") + .where("topics.created_at > ?", SiteSetting.ai_translation_backfill_max_age_days.days.ago) if SiteSetting.ai_translation_backfill_limit_to_public_content topics = @@ -28,14 +31,6 @@ module Jobs ) end - if SiteSetting.ai_translation_backfill_max_age_days > 0 - topics = - topics.where( - "topics.created_at > ?", - SiteSetting.ai_translation_backfill_max_age_days.days.ago, - ) - end - topics = topics.order(updated_at: :desc).limit(limit) return if topics.empty? diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 86ec5510..1126bd93 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -122,7 +122,7 @@ en: ai_translation_model: "The model to use for translation. This model must support translation. Personas can override this setting." ai_translation_backfill_limit_to_public_content: "When enabled, only content in public categories will be translated. When disabled, content in group PMs and private categories will also be sent for translation." ai_translation_max_post_length: "The maximum length of a post to be translated. Posts longer than this will not be translated." - ai_translation_backfill_max_age_days: "The maximum age of a post and topic to be translated. Posts and topics older than this will not be translated." + ai_translation_backfill_max_age_days: "The maximum age of a post and topic to be translated. Posts and topics older than this will not be translated. 0 disables backfilling, but will not disable translation of new posts." reviewables: reasons: diff --git a/lib/translation.rb b/lib/translation.rb new file mode 100644 index 00000000..09093f1c --- /dev/null +++ b/lib/translation.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module DiscourseAi + module Translation + def self.enabled? + SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled && + SiteSetting.ai_translation_model.present? && + SiteSetting.content_localization_supported_locales.present? + end + + def self.backfill_enabled? + enabled? && SiteSetting.ai_translation_backfill_hourly_rate > 0 && + SiteSetting.ai_translation_backfill_max_age_days > 0 + end + end +end diff --git a/lib/translation/entry_point.rb b/lib/translation/entry_point.rb index 418fe3d6..68447209 100644 --- a/lib/translation/entry_point.rb +++ b/lib/translation/entry_point.rb @@ -5,19 +5,19 @@ module DiscourseAi class EntryPoint def inject_into(plugin) plugin.on(:post_created) do |post| - if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled + if DiscourseAi::Translation.enabled? Jobs.enqueue(:detect_translate_post, post_id: post.id) end end plugin.on(:topic_created) do |topic| - if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled + if DiscourseAi::Translation.enabled? Jobs.enqueue(:detect_translate_topic, topic_id: topic.id) end end plugin.on(:post_edited) do |post, topic_changed| - if SiteSetting.discourse_ai_enabled && SiteSetting.ai_translation_enabled && topic_changed + if DiscourseAi::Translation.enabled? && topic_changed Jobs.enqueue(:detect_translate_topic, topic_id: post.topic_id) end end diff --git a/lib/translation/locale_normalizer.rb b/lib/translation/locale_normalizer.rb index 67428288..d9540dc5 100644 --- a/lib/translation/locale_normalizer.rb +++ b/lib/translation/locale_normalizer.rb @@ -15,6 +15,13 @@ module DiscourseAi locale end + def self.is_same?(locale1, locale2) + return true if locale1 == locale2 + locale1 = locale1.gsub("-", "_").downcase + locale2 = locale2.gsub("-", "_").downcase + locale1.split("_").first == locale2.split("_").first + end + private def self.i18n_pairs diff --git a/spec/jobs/regular/detect_translate_post_spec.rb b/spec/jobs/regular/detect_translate_post_spec.rb index 378d213a..570b7093 100644 --- a/spec/jobs/regular/detect_translate_post_spec.rb +++ b/spec/jobs/regular/detect_translate_post_spec.rb @@ -32,9 +32,9 @@ describe Jobs::DetectTranslatePost do end it "detects locale" do - SiteSetting.discourse_ai_enabled = true - DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).once - DiscourseAi::Translation::PostLocalizer.expects(:localize).twice + allow(DiscourseAi::Translation::PostLocaleDetector).to receive(:detect_locale).with( + post, + ).and_return("zh_CN") job.execute({ post_id: post.id }) end @@ -42,21 +42,21 @@ describe Jobs::DetectTranslatePost do it "skips locale detection when post has a locale" do post.update!(locale: "en") DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).never - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once job.execute({ post_id: post.id }) end it "skips bot posts" do post.update!(user: Discourse.system_user) + DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never DiscourseAi::Translation::PostLocalizer.expects(:localize).never job.execute({ post_id: post.id }) end - it "does not translate when no target languages are configured" do + it "skips locale detection when no target languages are configured" do SiteSetting.content_localization_supported_locales = "" - DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).with(post).returns("en") + DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).never DiscourseAi::Translation::PostLocalizer.expects(:localize).never job.execute({ post_id: post.id }) @@ -72,7 +72,16 @@ describe Jobs::DetectTranslatePost do it "skips translating if the post is already localized" do post.update(locale: "en") - Fabricate(:post_localization, post: post, locale: "ja") + Fabricate(:post_localization, post:, locale: "ja") + + DiscourseAi::Translation::PostLocalizer.expects(:localize).never + + job.execute({ post_id: post.id }) + end + + it "does not translate to language of similar variant" do + post.update(locale: "en_GB") + Fabricate(:post_localization, post: post, locale: "ja_JP") DiscourseAi::Translation::PostLocalizer.expects(:localize).never diff --git a/spec/jobs/regular/detect_translate_topic_spec.rb b/spec/jobs/regular/detect_translate_topic_spec.rb index bdbd4b03..80e5b8f1 100644 --- a/spec/jobs/regular/detect_translate_topic_spec.rb +++ b/spec/jobs/regular/detect_translate_topic_spec.rb @@ -33,7 +33,9 @@ describe Jobs::DetectTranslateTopic do it "detects locale" do SiteSetting.discourse_ai_enabled = true - DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).once + allow(DiscourseAi::Translation::TopicLocaleDetector).to receive(:detect_locale).with( + topic, + ).and_return("zh_CN") DiscourseAi::Translation::TopicLocalizer.expects(:localize).twice job.execute({ topic_id: topic.id }) @@ -54,9 +56,9 @@ describe Jobs::DetectTranslateTopic do job.execute({ topic_id: topic.id }) end - it "does not translate when no target languages are configured" do + it "does not get locale or translate when no target languages are configured" do SiteSetting.content_localization_supported_locales = "" - DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(topic).returns("en") + DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).never DiscourseAi::Translation::TopicLocalizer.expects(:localize).never job.execute({ topic_id: topic.id }) @@ -78,6 +80,15 @@ describe Jobs::DetectTranslateTopic do job.execute({ topic_id: topic.id }) end + it "does not translate to language of similar variant" do + topic.update(locale: "en_GB") + Fabricate(:topic_localization, topic:, locale: "ja_JP") + + DiscourseAi::Translation::PostLocalizer.expects(:localize).never + + job.execute({ topic_id: topic.id }) + end + it "handles translation errors gracefully" do topic.update(locale: "en") DiscourseAi::Translation::TopicLocalizer.expects(:localize).raises( diff --git a/spec/jobs/regular/localize_categories_spec.rb b/spec/jobs/regular/localize_categories_spec.rb index 96de1faf..bbcdfc6a 100644 --- a/spec/jobs/regular/localize_categories_spec.rb +++ b/spec/jobs/regular/localize_categories_spec.rb @@ -15,7 +15,7 @@ describe Jobs::LocalizeCategories do SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}") end SiteSetting.ai_translation_enabled = true - SiteSetting.content_localization_supported_locales = "pt|zh_CN" + SiteSetting.content_localization_supported_locales = "pt_BR|zh_CN" Jobs.run_immediately! end @@ -65,7 +65,7 @@ describe Jobs::LocalizeCategories do DiscourseAi::Translation::CategoryLocalizer .expects(:localize) - .with(is_a(Category), "pt") + .with(is_a(Category), "pt_BR") .times(number_of_categories) DiscourseAi::Translation::CategoryLocalizer .expects(:localize) @@ -92,7 +92,10 @@ describe Jobs::LocalizeCategories do it "skips categories that already have localizations" do localize_all_categories("pt", "zh_CN") - DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(is_a(Category), "pt").never + DiscourseAi::Translation::CategoryLocalizer + .expects(:localize) + .with(is_a(Category), "pt_BR") + .never DiscourseAi::Translation::CategoryLocalizer .expects(:localize) .with(is_a(Category), "zh_CN") @@ -107,7 +110,8 @@ describe Jobs::LocalizeCategories do category1 = Fabricate(:category, name: "First", description: "First description", locale: "en") DiscourseAi::Translation::CategoryLocalizer .expects(:localize) - .with(category1, "pt") + .with(category1, "pt_BR") + .once .raises(StandardError.new("API error")) DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "zh_CN").once diff --git a/spec/jobs/regular/localize_posts_spec.rb b/spec/jobs/regular/localize_posts_spec.rb index 6381f982..92aae58d 100644 --- a/spec/jobs/regular/localize_posts_spec.rb +++ b/spec/jobs/regular/localize_posts_spec.rb @@ -13,6 +13,8 @@ describe Jobs::LocalizePosts do end SiteSetting.ai_translation_enabled = true SiteSetting.content_localization_supported_locales = locales.join("|") + SiteSetting.ai_translation_backfill_hourly_rate = 100 + SiteSetting.ai_translation_backfill_max_age_days = 100 end it "does nothing when translator is disabled" do @@ -36,6 +38,13 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end + it "does nothing when ai_translation_backfill_hourly_rate is 0" do + SiteSetting.ai_translation_backfill_hourly_rate = 0 + DiscourseAi::Translation::PostLocalizer.expects(:localize).never + + job.execute({ limit: 10 }) + end + it "does nothing when there are no posts to translate" do Post.destroy_all DiscourseAi::Translation::PostLocalizer.expects(:localize).never @@ -43,20 +52,9 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end - it "skips posts that already have localizations" do - Post.all.each do |post| - Fabricate(:post_localization, post:, locale: "en") - Fabricate(:post_localization, post:, locale: "ja") - end - DiscourseAi::Translation::PostLocalizer.expects(:localize).never - - job.execute({ limit: 10 }) - end - it "skips bot posts" do - post.update!(user: Discourse.system_user) - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").never + post.update!(locale: "es", user: Discourse.system_user) + DiscourseAi::Translation::PostLocalizer.expects(:localize).never job.execute({ limit: 10 }) end @@ -90,7 +88,7 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end - it "scenario 2: returns post with locale 'es' if localizations for en/ja/de do not exist" do + it "scenario 2: localizes post with locale 'es' when localizations for en/ja/de do not exist" do post = Fabricate(:post, locale: "es") DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").once @@ -100,7 +98,7 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end - it "scenario 3: returns post with locale 'en' if ja/de localization does not exist" do + it "scenario 3: localizes post with locale 'en' when ja/de localization do not exist" do post = Fabricate(:post, locale: "en") DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").once @@ -110,13 +108,32 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end - it "scenario 4: skips post with locale 'en' if 'ja' localization already exists" do + it "scenario 4: skips post with locale 'en' if all localizations exist" do post = Fabricate(:post, locale: "en") Fabricate(:post_localization, post: post, locale: "ja") + Fabricate(:post_localization, post: post, locale: "de") - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "en").never - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "ja").never - DiscourseAi::Translation::PostLocalizer.expects(:localize).with(post, "de").once + DiscourseAi::Translation::PostLocalizer.expects(:localize).never + + job.execute({ limit: 10 }) + end + + it "scenario 5: skips posts that already have localizations in similar language variant" do + post = Fabricate(:post, locale: "en") + Fabricate(:post_localization, post: post, locale: "ja_JP") + Fabricate(:post_localization, post: post, locale: "de_DE") + + DiscourseAi::Translation::PostLocalizer.expects(:localize).never + + job.execute({ limit: 10 }) + end + + it "scenario 6: skips posts with variant 'en_GB' when localizations for ja/de exist" do + post = Fabricate(:post, locale: "en_GB") + Fabricate(:post_localization, post: post, locale: "ja_JP") + Fabricate(:post_localization, post: post, locale: "de_DE") + + DiscourseAi::Translation::PostLocalizer.expects(:localize).never job.execute({ limit: 10 }) end @@ -201,8 +218,8 @@ describe Jobs::LocalizePosts do job.execute({ limit: 10 }) end - it "processes all posts when setting is disabled" do - SiteSetting.ai_translation_backfill_max_age_days = 0 + it "processes all posts when setting is large" do + SiteSetting.ai_translation_backfill_max_age_days = 1000 DiscourseAi::Translation::PostLocalizer.expects(:localize).with(new_post, "ja").once diff --git a/spec/jobs/regular/localize_topics_spec.rb b/spec/jobs/regular/localize_topics_spec.rb index ae263e08..f091b915 100644 --- a/spec/jobs/regular/localize_topics_spec.rb +++ b/spec/jobs/regular/localize_topics_spec.rb @@ -13,6 +13,8 @@ describe Jobs::LocalizeTopics do end SiteSetting.ai_translation_enabled = true SiteSetting.content_localization_supported_locales = locales.join("|") + SiteSetting.ai_translation_backfill_hourly_rate = 100 + SiteSetting.ai_translation_backfill_max_age_days = 100 end it "does nothing when translator is disabled" do @@ -116,13 +118,22 @@ describe Jobs::LocalizeTopics do job.execute({ limit: 10 }) end - it "scenario 4: skips topic with locale 'en' if 'ja' localization already exists" do + it "scenario 4: skips topic with locale 'en' if all localizations exist" do topic = Fabricate(:topic, locale: "en") Fabricate(:topic_localization, topic: topic, locale: "ja") + Fabricate(:topic_localization, topic: topic, locale: "de") - DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "en").never - DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "ja").never - DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(topic, "de").once + DiscourseAi::Translation::TopicLocalizer.expects(:localize).never + + job.execute({ limit: 10 }) + end + + it "scenario 5: skips topic that already have localizations in similar language variant" do + topic = Fabricate(:topic, locale: "en") + Fabricate(:topic_localization, topic: topic, locale: "ja_JP") + Fabricate(:topic_localization, topic: topic, locale: "de_DE") + + DiscourseAi::Translation::TopicLocalizer.expects(:localize).never job.execute({ limit: 10 }) end @@ -205,8 +216,8 @@ describe Jobs::LocalizeTopics do job.execute({ limit: 10 }) end - it "processes all topics when setting is disabled" do - SiteSetting.ai_translation_backfill_max_age_days = 0 + it "processes all topics when setting is more than the post age" do + SiteSetting.ai_translation_backfill_max_age_days = 100 DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "en").once DiscourseAi::Translation::TopicLocalizer.expects(:localize).with(new_topic, "ja").once diff --git a/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb b/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb index 2ce7b066..31e96275 100644 --- a/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb +++ b/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb @@ -11,6 +11,7 @@ describe Jobs::CategoriesLocaleDetectionBackfill do end SiteSetting.ai_translation_enabled = true SiteSetting.ai_translation_backfill_hourly_rate = 100 + SiteSetting.content_localization_supported_locales = "en" end it "does nothing when AI is disabled" do diff --git a/spec/jobs/scheduled/posts_locale_detection_backfill_spec.rb b/spec/jobs/scheduled/posts_locale_detection_backfill_spec.rb index f78af32e..dfbc46f9 100644 --- a/spec/jobs/scheduled/posts_locale_detection_backfill_spec.rb +++ b/spec/jobs/scheduled/posts_locale_detection_backfill_spec.rb @@ -11,6 +11,7 @@ describe Jobs::PostsLocaleDetectionBackfill do end SiteSetting.ai_translation_enabled = true SiteSetting.ai_translation_backfill_hourly_rate = 100 + SiteSetting.content_localization_supported_locales = "en" end it "does nothing when translator is disabled" do @@ -137,8 +138,8 @@ describe Jobs::PostsLocaleDetectionBackfill do job.execute({}) end - it "processes all posts when setting is disabled" do - SiteSetting.ai_translation_backfill_max_age_days = 0 + it "processes all posts when setting is large" do + SiteSetting.ai_translation_backfill_max_age_days = 100 # other posts DiscourseAi::Translation::PostLocaleDetector.expects(:detect_locale).at_least_once diff --git a/spec/jobs/scheduled/topics_locale_detection_backfill_spec.rb b/spec/jobs/scheduled/topics_locale_detection_backfill_spec.rb index 285096cd..92488331 100644 --- a/spec/jobs/scheduled/topics_locale_detection_backfill_spec.rb +++ b/spec/jobs/scheduled/topics_locale_detection_backfill_spec.rb @@ -11,6 +11,7 @@ describe Jobs::TopicsLocaleDetectionBackfill do end SiteSetting.ai_translation_enabled = true SiteSetting.ai_translation_backfill_hourly_rate = 100 + SiteSetting.content_localization_supported_locales = "en" end it "does nothing when translator is disabled" do @@ -148,8 +149,8 @@ describe Jobs::TopicsLocaleDetectionBackfill do job.execute({ limit: 10 }) end - it "processes all topics when setting is disabled" do - SiteSetting.ai_translation_backfill_max_age_days = 0 + it "processes all topics when setting is large" do + SiteSetting.ai_translation_backfill_max_age_days = 100 DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(new_topic).once DiscourseAi::Translation::TopicLocaleDetector.expects(:detect_locale).with(old_topic).once diff --git a/spec/lib/translation/entry_point_spec.rb b/spec/lib/translation/entry_point_spec.rb index ba2c7604..a2cd00d4 100644 --- a/spec/lib/translation/entry_point_spec.rb +++ b/spec/lib/translation/entry_point_spec.rb @@ -7,6 +7,7 @@ describe DiscourseAi::Translation::EntryPoint do SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}") end SiteSetting.ai_translation_enabled = true + SiteSetting.content_localization_supported_locales = "en" end describe "upon post process cooked" do @@ -56,6 +57,13 @@ describe DiscourseAi::Translation::EntryPoint do fab!(:post) { Fabricate(:post, post_number: 1) } fab!(:non_first_post) { Fabricate(:post, post_number: 2) } + before do + SiteSetting.discourse_ai_enabled = true + Fabricate(:fake_model).tap do |fake_llm| + SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}") + end + end + it "enqueues detect topic locale and translate topic job" do SiteSetting.ai_translation_enabled = true topic = post.topic diff --git a/spec/lib/translation/locale_normalizer_spec.rb b/spec/lib/translation/locale_normalizer_spec.rb index 302c8e8e..772ec89f 100644 --- a/spec/lib/translation/locale_normalizer_spec.rb +++ b/spec/lib/translation/locale_normalizer_spec.rb @@ -1,14 +1,42 @@ # frozen_string_literal: true describe DiscourseAi::Translation::LocaleNormalizer do - it "matches input locales to i18n locales" do - expect(described_class.normalize_to_i18n("en-GB")).to eq("en_GB") - expect(described_class.normalize_to_i18n("en")).to eq("en") - expect(described_class.normalize_to_i18n("zh")).to eq("zh_CN") - expect(described_class.normalize_to_i18n("tr")).to eq("tr_TR") + describe ".normalize_to_i18n" do + it "matches input locales to i18n locales" do + expect(described_class.normalize_to_i18n("en-GB")).to eq("en_GB") + expect(described_class.normalize_to_i18n("en")).to eq("en") + expect(described_class.normalize_to_i18n("zh")).to eq("zh_CN") + expect(described_class.normalize_to_i18n("tr")).to eq("tr_TR") + end + + it "converts dashes to underscores" do + expect(described_class.normalize_to_i18n("a-b")).to eq("a_b") + end end - it "converts dashes to underscores" do - expect(described_class.normalize_to_i18n("a-b")).to eq("a_b") + describe "#is_same?" do + it "returns true for the same locale" do + expect(described_class.is_same?("en", "en")).to be true + end + + it "returns true for locales with different cases" do + expect(described_class.is_same?("en", "EN")).to be true + end + + it "returns true for locales with different separators" do + expect(described_class.is_same?("en-US", "en_US")).to be true + end + + it "returns false for different locales" do + expect(described_class.is_same?("en", "ja")).to be false + end + + it "returns true for locales with the same base language" do + expect(described_class.is_same?("zh-CN", "zh_TW")).to be true + end + + it "returns false for completely different locales" do + expect(described_class.is_same?("en", "ja")).to be false + end end end