discourse-ai/app/jobs/scheduled/posts_locale_detection_backfill.rb
Natalie Tay d54cd1f602
DEV: Normalize locales that are similar (e.g. en and en_GB) so they do not get translated (#1495)
This commit
- normalizes locales like en_GB and variants to en. With this, the feature will not translate en_GB posts to en (or similarly pt_BR to pt_PT)
- consolidates whether the feature is enabled in `DiscourseAi::Translation.enabled?`
- similarly for backfill in  `DiscourseAi::Translation.backfill_enabled?`
  - turns off backfill if `ai_translation_backfill_max_age_days` is 0 to keep true to what it says. Set it to a high number to backfill everything
2025-07-09 22:21:51 +08:00

55 lines
1.8 KiB
Ruby

# frozen_string_literal: true
module Jobs
class PostsLocaleDetectionBackfill < ::Jobs::Scheduled
every 5.minutes
sidekiq_options retry: false
cluster_concurrency 1
def execute(args)
return if !DiscourseAi::Translation.backfill_enabled?
limit = SiteSetting.ai_translation_backfill_hourly_rate / (60 / 5) # this job runs in 5-minute intervals
posts =
Post
.where(locale: nil)
.where(deleted_at: nil)
.where("posts.user_id > 0")
.where("posts.created_at > ?", SiteSetting.ai_translation_backfill_max_age_days.days.ago)
.where.not(raw: [nil, ""])
if SiteSetting.ai_translation_backfill_limit_to_public_content
posts =
posts
.joins(:topic)
.where(topics: { category_id: Category.where(read_restricted: false).select(:id) })
.where("archetype != ?", Archetype.private_message)
else
posts =
posts.joins(:topic).where(
"topics.archetype != ? OR EXISTS (SELECT 1 FROM topic_allowed_groups WHERE topic_id = topics.id)",
Archetype.private_message,
)
end
posts = posts.order(updated_at: :desc).limit(limit)
return if posts.empty?
posts.each do |post|
begin
DiscourseAi::Translation::PostLocaleDetector.detect_locale(post)
rescue FinalDestination::SSRFDetector::LookupFailedError
# do nothing, there are too many sporadic lookup failures
rescue => e
DiscourseAi::Translation::VerboseLogger.log(
"Failed to detect post #{post.id}'s locale: #{e.message}\n\n#{e.backtrace[0..3].join("\n")}",
)
end
end
DiscourseAi::Translation::VerboseLogger.log("Detected #{posts.size} post locales")
end
end
end