diff --git a/.discourse-compatibility b/.discourse-compatibility index 73df02b1..1e41b6e8 100644 --- a/.discourse-compatibility +++ b/.discourse-compatibility @@ -1,3 +1,4 @@ +< 3.5.0.beta6-dev: 3e74eea1e5e3143888d67a8d8a11206df214dc24 < 3.5.0.beta3-dev: 09a68414804a1447f52e5d60691ba59742cda9ec < 3.5.0.beta2-dev: de8624416a15b3d8e7ad350b083cc1420451ccec < 3.5.0.beta1-dev: bdef136080074a993e7c4f5ca562edc31a8ba756 diff --git a/app/jobs/regular/localize_categories.rb b/app/jobs/regular/localize_categories.rb index f123fd52..f370e2e7 100644 --- a/app/jobs/regular/localize_categories.rb +++ b/app/jobs/regular/localize_categories.rb @@ -17,7 +17,8 @@ module Jobs cat_id = args[:from_category_id] || Category.order(:id).first&.id last_id = nil - categories = Category.where("id >= ?", cat_id).order(:id).limit(BATCH_SIZE) + categories = + Category.where("id >= ? AND locale IS NOT NULL", cat_id).order(:id).limit(BATCH_SIZE) return if categories.empty? categories.each do |category| @@ -26,9 +27,13 @@ module Jobs next end - CategoryLocalization.transaction do - locales.each do |locale| - next if CategoryLocalization.exists?(category_id: category.id, locale: locale) + locales.each do |locale| + localization = category.category_localizations.find_by(locale:) + + if locale == category.locale && localization + localization.destroy + else + next if locale == category.locale begin DiscourseAi::Translation::CategoryLocalizer.localize(category, locale) rescue FinalDestination::SSRFDetector::LookupFailedError diff --git a/app/jobs/scheduled/categories_locale_detection_backfill.rb b/app/jobs/scheduled/categories_locale_detection_backfill.rb new file mode 100644 index 00000000..4ea30c00 --- /dev/null +++ b/app/jobs/scheduled/categories_locale_detection_backfill.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Jobs + class CategoriesLocaleDetectionBackfill < ::Jobs::Scheduled + every 1.hour + sidekiq_options retry: false + cluster_concurrency 1 + + def execute(args) + return if !SiteSetting.discourse_ai_enabled + return if !SiteSetting.ai_translation_enabled + return if SiteSetting.ai_translation_backfill_rate == 0 + + categories = Category.where(locale: nil) + + if SiteSetting.ai_translation_backfill_limit_to_public_content + categories = categories.where(read_restricted: false) + end + + categories = categories.limit(SiteSetting.ai_translation_backfill_rate) + return if categories.empty? + + categories.each do |category| + begin + DiscourseAi::Translation::CategoryLocaleDetector.detect_locale(category) + rescue FinalDestination::SSRFDetector::LookupFailedError + rescue => e + DiscourseAi::Translation::VerboseLogger.log( + "Failed to detect category #{category.id}'s locale: #{e.message}", + ) + end + end + + DiscourseAi::Translation::VerboseLogger.log("Detected #{categories.size} category locales") + end + end +end diff --git a/lib/translation/category_locale_detector.rb b/lib/translation/category_locale_detector.rb new file mode 100644 index 00000000..544cf667 --- /dev/null +++ b/lib/translation/category_locale_detector.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module DiscourseAi + module Translation + class CategoryLocaleDetector + def self.detect_locale(category) + return if category.blank? + + text = [category.name, category.description].compact.join("\n\n") + return if text.blank? + + detected_locale = LanguageDetector.new(text).detect + locale = LocaleNormalizer.normalize_to_i18n(detected_locale) + category.update_column(:locale, locale) + locale + end + end + end +end diff --git a/lib/translation/category_localizer.rb b/lib/translation/category_localizer.rb index d22a800f..1de93432 100644 --- a/lib/translation/category_localizer.rb +++ b/lib/translation/category_localizer.rb @@ -9,9 +9,12 @@ module DiscourseAi target_locale_sym = target_locale.to_s.sub("-", "_").to_sym translated_name = ShortTextTranslator.new(category.name, target_locale_sym).translate - # category descriptions are first paragraphs of posts translated_description = - PostRawTranslator.new(category.description, target_locale_sym).translate + if category.description.present? + PostRawTranslator.new(category.description, target_locale_sym).translate + else + "" + end localization = CategoryLocalization.find_or_initialize_by( diff --git a/spec/jobs/regular/localize_categories_spec.rb b/spec/jobs/regular/localize_categories_spec.rb index 5f778cc7..e0e55d18 100644 --- a/spec/jobs/regular/localize_categories_spec.rb +++ b/spec/jobs/regular/localize_categories_spec.rb @@ -53,7 +53,9 @@ describe Jobs::LocalizeCategories do end it "translates categories to the configured locales" do + Category.update_all(locale: "en") number_of_categories = Category.count + DiscourseAi::Translation::CategoryLocalizer .expects(:localize) .with(is_a(Category), "pt") @@ -69,20 +71,19 @@ describe Jobs::LocalizeCategories do it "skips categories that already have localizations" do localize_all_categories("pt", "zh_CN") - category1 = - Fabricate(:category, name: "First Category", description: "First category description") - Fabricate(:category_localization, category: category1, locale: "pt", name: "Primeira Categoria") - - # It should only translate to Chinese, not Portuguese - DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "pt").never - DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(category1, "zh_CN").once + DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(is_a(Category), "pt").never + DiscourseAi::Translation::CategoryLocalizer + .expects(:localize) + .with(is_a(Category), "zh_CN") + .never job.execute({}) end it "continues from a specified category ID" do - category1 = Fabricate(:category, name: "First", description: "First description") - category2 = Fabricate(:category, name: "Second", description: "Second description") + category1 = Fabricate(:category, name: "First", description: "First description", locale: "en") + category2 = + Fabricate(:category, name: "Second", description: "Second description", locale: "en") DiscourseAi::Translation::CategoryLocalizer .expects(:localize) @@ -99,7 +100,7 @@ describe Jobs::LocalizeCategories do it "handles translation errors gracefully" do localize_all_categories("pt", "zh_CN") - category1 = Fabricate(:category, name: "First", description: "First description") + category1 = Fabricate(:category, name: "First", description: "First description", locale: "en") DiscourseAi::Translation::CategoryLocalizer .expects(:localize) .with(category1, "pt") @@ -110,6 +111,8 @@ describe Jobs::LocalizeCategories do end it "enqueues the next batch when there are more categories" do + Category.update_all(locale: "en") + Jobs.run_later! freeze_time Jobs::LocalizeCategories.const_set(:BATCH_SIZE, 1) @@ -134,10 +137,8 @@ describe Jobs::LocalizeCategories do it "skips read-restricted categories when configured" do SiteSetting.ai_translation_backfill_limit_to_public_content = true - category1 = Fabricate(:category, name: "Public Category", read_restricted: false) - category2 = Fabricate(:category, name: "Private Category", read_restricted: true) - - DiscourseAi::Translation::CategoryLocalizer.expects(:localize).at_least_once + category1 = Fabricate(:category, name: "Public Category", read_restricted: false, locale: "en") + category2 = Fabricate(:category, name: "Private Category", read_restricted: true, locale: "en") DiscourseAi::Translation::CategoryLocalizer .expects(:localize) @@ -150,4 +151,40 @@ describe Jobs::LocalizeCategories do job.execute({}) end + + it "skips creating localizations in the same language as the category's locale" do + Category.update_all(locale: "pt") + + DiscourseAi::Translation::CategoryLocalizer.expects(:localize).with(is_a(Category), "pt").never + DiscourseAi::Translation::CategoryLocalizer + .expects(:localize) + .with(is_a(Category), "zh_CN") + .times(Category.count) + + job.execute({}) + end + + it "deletes existing localizations that match the category's locale" do + # update all categories to portuguese + Category.update_all(locale: "pt") + + localize_all_categories("pt", "zh_CN") + + expect { job.execute({}) }.to change { CategoryLocalization.exists?(locale: "pt") }.from( + true, + ).to(false) + end + + it "doesn't process categories with nil locale" do + # Add a category with nil locale + nil_locale_category = Fabricate(:category, name: "No Locale", locale: nil) + + # Make sure our query for categories with non-null locales excludes it + DiscourseAi::Translation::CategoryLocalizer + .expects(:localize) + .with(nil_locale_category, any_parameters) + .never + + job.execute({}) + end end diff --git a/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb b/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb new file mode 100644 index 00000000..5eff85fd --- /dev/null +++ b/spec/jobs/scheduled/categories_locale_detection_backfill_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +describe Jobs::CategoriesLocaleDetectionBackfill do + fab!(:category) { Fabricate(:category, locale: nil) } + subject(:job) { described_class.new } + + before do + SiteSetting.discourse_ai_enabled = true + Fabricate(:fake_model).tap do |fake_llm| + SiteSetting.public_send("ai_translation_model=", "custom:#{fake_llm.id}") + end + SiteSetting.ai_translation_enabled = true + SiteSetting.ai_translation_backfill_rate = 100 + end + + it "does nothing when AI is disabled" do + SiteSetting.discourse_ai_enabled = false + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when content translation is disabled" do + SiteSetting.ai_translation_enabled = false + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when backfill rate is 0" do + SiteSetting.ai_translation_backfill_rate = 0 + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "does nothing when there are no categories to detect" do + Category.update_all(locale: "en") + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).never + + job.execute({}) + end + + it "detects locale for categories with nil locale" do + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).times(Category.count) + + job.execute({}) + end + + it "handles detection errors gracefully" do + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).at_least_once + DiscourseAi::Translation::CategoryLocaleDetector + .expects(:detect_locale) + .with(category) + .raises(StandardError.new("error")) + .once + + expect { job.execute({}) }.not_to raise_error + end + + it "logs a summary after running" do + DiscourseAi::Translation::CategoryLocaleDetector.stubs(:detect_locale) + DiscourseAi::Translation::VerboseLogger.expects(:log).with(includes("Detected #{Category.count} category locales")) + + job.execute({}) + end + + describe "with public content limitation" do + fab!(:private_category) { Fabricate(:private_category, group: Group[:staff], locale: nil) } + + before do + # catch-all for other categories + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(is_a(Category)).at_least_once + + SiteSetting.ai_translation_backfill_limit_to_public_content = true + end + + it "only processes public categories" do + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(category).once + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(private_category).never + + job.execute({}) + end + + it "processes all categories when setting is disabled" do + SiteSetting.ai_translation_backfill_limit_to_public_content = false + + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(category).once + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).with(private_category).once + + job.execute({}) + end + end + + it "limits processing to the backfill rate" do + SiteSetting.ai_translation_backfill_rate = 1 + Fabricate(:category, locale: nil) + + DiscourseAi::Translation::CategoryLocaleDetector.expects(:detect_locale).once + + job.execute({}) + end +end diff --git a/spec/lib/translation/category_locale_detector_spec.rb b/spec/lib/translation/category_locale_detector_spec.rb new file mode 100644 index 00000000..078b9ecf --- /dev/null +++ b/spec/lib/translation/category_locale_detector_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +describe DiscourseAi::Translation::CategoryLocaleDetector do + describe ".detect_locale" do + fab!(:category) { Fabricate(:category, name: "Hello world", description: "Welcome to this category", locale: nil) } + + def language_detector_stub(opts) + mock = instance_double(DiscourseAi::Translation::LanguageDetector) + allow(DiscourseAi::Translation::LanguageDetector).to receive(:new).with( + opts[:text], + ).and_return(mock) + allow(mock).to receive(:detect).and_return(opts[:locale]) + end + + it "returns nil if category is blank" do + expect(described_class.detect_locale(nil)).to eq(nil) + end + + it "updates the category locale with the detected locale" do + text = "#{category.name}\n\n#{category.description}" + language_detector_stub({ text: text, locale: "zh_CN" }) + + expect { described_class.detect_locale(category) }.to change { category.reload.locale }.from(nil).to( + "zh_CN", + ) + end + + it "handles category with no description" do + no_description_category = Fabricate(:category, name: "Test Category", description: nil, locale: nil) + language_detector_stub({ text: no_description_category.name, locale: "fr" }) + + expect { described_class.detect_locale(no_description_category) }.to change { no_description_category.reload.locale }.from(nil).to("fr") + end + + it "bypasses validations when updating locale" do + language_detector_stub({ text: "#{category.name}\n\n#{category.description}", locale: "zh_CN" }) + + described_class.detect_locale(category) + expect(category.reload.locale).to eq("zh_CN") + end + end +end