DEV: Add topic and post id when using completions for traceability to AiApiAuditLog (#1414)

The AiApiAuditLog per translation event doesn't trace back easily to a post or topic.

This commit adds support to that, and also switches the translators to named arguments rather than positional arguments.
This commit is contained in:
Natalie Tay 2025-06-06 23:24:24 +08:00 committed by GitHub
parent 8a3a247b11
commit 6827147362
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 98 additions and 72 deletions

View File

@ -3,9 +3,11 @@
module DiscourseAi
module Translation
class BaseTranslator
def initialize(text, target_language)
def initialize(text:, target_locale:, topic_id: nil, post_id: nil)
@text = text
@target_language = target_language
@target_locale = target_locale
@topic_id = topic_id
@post_id = post_id
end
def translate
@ -13,6 +15,8 @@ module DiscourseAi
DiscourseAi::Completions::Prompt.new(
prompt_template,
messages: [{ type: :user, content: formatted_content, id: "user" }],
topic_id: @topic_id,
post_id: @post_id,
)
structured_output =
@ -27,7 +31,7 @@ module DiscourseAi
end
def formatted_content
{ content: @text, target_language: @target_language }.to_json
{ content: @text, target_locale: @target_locale }.to_json
end
def response_format

View File

@ -6,12 +6,12 @@ module DiscourseAi
def self.localize(category, target_locale = I18n.locale)
return if category.blank? || target_locale.blank?
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
target_locale = target_locale.to_s.sub("-", "_")
translated_name = ShortTextTranslator.new(category.name, target_locale_sym).translate
translated_name = ShortTextTranslator.new(text: category.name, target_locale:).translate
translated_description =
if category.description.present?
PostRawTranslator.new(category.description, target_locale_sym).translate
PostRawTranslator.new(text: category.description, target_locale:).translate
else
""
end
@ -19,7 +19,7 @@ module DiscourseAi
localization =
CategoryLocalization.find_or_initialize_by(
category_id: category.id,
locale: target_locale_sym.to_s,
locale: target_locale,
)
localization.name = translated_name

View File

@ -5,16 +5,23 @@ module DiscourseAi
class PostLocalizer
def self.localize(post, target_locale = I18n.locale)
return if post.blank? || target_locale.blank? || post.locale == target_locale.to_s
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
target_locale = target_locale.to_s.sub("-", "_")
translated_raw =
ContentSplitter
.split(post.raw)
.map { |chunk| PostRawTranslator.new(chunk, target_locale_sym).translate }
.map do |text|
PostRawTranslator.new(
text:,
target_locale:,
topic_id: post.topic_id,
post_id: post.id,
).translate
end
.join("")
localization =
PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale_sym.to_s)
PostLocalization.find_or_initialize_by(post_id: post.id, locale: target_locale)
localization.raw = translated_raw
localization.cooked = PrettyText.cook(translated_raw)

View File

@ -21,17 +21,17 @@ module DiscourseAi
8. Ensure the translation only contains the original language and the target language.
Output your translation in the following JSON format:
{"translation": "Your TARGET_LANGUAGE translation here"}
{"translation": "Your TARGET_LOCALE translation here"}
Here are three examples of correct translations:
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_language":"Spanish"}
Original: {"content":"New Update for Minecraft Adds Underwater Temples", "target_locale":"Spanish"}
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_language":"French"}
Original: {"content": "# Machine Learning 101\n\nMachine Learning (ML) is a subset of Artificial Intelligence (AI) that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience.\n\n## Key Concepts\n\n1. **Supervised Learning**: The algorithm learns from labeled training data.\n2. **Unsupervised Learning**: The algorithm finds patterns in unlabeled data.\n3. **Reinforcement Learning**: The algorithm learns through interaction with an environment.\n\n```python\n# Simple example of a machine learning model\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# Assuming X and y are your features and target variables\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Evaluate the model\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nFor more information, visit [Machine Learning on Wikipedia](https://en.wikipedia.org/wiki/Machine_learning).", "target_locale":"French"}
Correct translation: {"translation": "# Machine Learning 101\n\nLe Machine Learning (ML) est un sous-ensemble de l'Intelligence Artificielle (IA) qui se concentre sur le développement d'algorithmes et de modèles statistiques permettant aux systèmes informatiques d'améliorer leurs performances sur une tâche spécifique grâce à l'expérience.\n\n## Concepts clés\n\n1. **Apprentissage supervisé** : L'algorithme apprend à partir de données d'entraînement étiquetées.\n2. **Apprentissage non supervisé** : L'algorithme trouve des motifs dans des données non étiquetées.\n3. **Apprentissage par renforcement** : L'algorithme apprend à travers l'interaction avec un environnement.\n\n```python\n# Exemple simple d'un modèle de machine learning\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\n# En supposant que X et y sont vos variables de caractéristiques et cibles\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(X_train, y_train)\n\n# Évaluer le modèle\naccuracy = model.score(X_test, y_test)\nprint(f'Model accuracy: {accuracy}')\n```\n\nPour plus d'informations, visitez [Machine Learning sur Wikipedia](https://en.wikipedia.org/wiki/Machine_learning)."}
Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_language": "English"}
Original: {"content": "**Heathrow fechado**: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de *Londres*", "target_locale": "English"}
Correct translation: {"translation": "**Heathrow closed**: flight disruption expected to continue in coming days, says *London* airport management"}
Remember, you are being consumed via an API. Only return the translated text in the specified JSON format. Do not include any additional information or explanations in your response.

View File

@ -15,18 +15,18 @@ module DiscourseAi
Provide your translation in the following JSON format:
<output>
{"translation": "target_language translation here"}
{"translation": "target_locale translation here"}
</output>
Here are three examples of correct translation
Original: {"content":"Japan", "target_language":"Spanish"}
Original: {"content":"Japan", "target_locale":"es"}
Correct translation: {"translation": "Japón"}
Original: {"name":"Cats and Dogs", "target_language":"Chinese"}
Original: {"name":"Cats and Dogs", "target_locale":"zh_CN"}
Correct translation: {"translation": "猫和狗"}
Original: {"name": "Q&A", "target_language": "Portuguese"}
Original: {"name": "Q&A", "target_locale": "pt"}
Correct translation: {"translation": "Perguntas e Respostas"}
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the text now and provide your answer in the specified JSON format.

View File

@ -6,16 +6,15 @@ module DiscourseAi
def self.localize(topic, target_locale = I18n.locale)
return if topic.blank? || target_locale.blank? || topic.locale == target_locale.to_s
target_locale_sym = target_locale.to_s.sub("-", "_").to_sym
target_locale = target_locale.to_s.sub("-", "_")
translated_title = TopicTitleTranslator.new(topic.title, target_locale_sym).translate
translated_excerpt = ShortTextTranslator.new(topic.excerpt, target_locale_sym).translate
translated_title =
TopicTitleTranslator.new(text: topic.title, target_locale:, topic_id: topic.id).translate
translated_excerpt =
ShortTextTranslator.new(text: topic.excerpt, target_locale:, topic_id: topic.id).translate
localization =
TopicLocalization.find_or_initialize_by(
topic_id: topic.id,
locale: target_locale_sym.to_s,
)
TopicLocalization.find_or_initialize_by(topic_id: topic.id, locale: target_locale)
localization.title = translated_title
localization.fancy_title = Topic.fancy_title(translated_title)

View File

@ -4,9 +4,9 @@ module DiscourseAi
module Translation
class TopicTitleTranslator < BaseTranslator
PROMPT_TEMPLATE = <<~TEXT.freeze
You are a translation service specializing in translating forum post titles from English to the asked target_language. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines:
You are a translation service specializing in translating forum post titles from English to the asked target_locale. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines:
1. Translate the given title from English to target_language asked.
1. Translate the given title from English to target_locale asked.
2. Keep proper nouns and technical terms in their original language.
3. Attempt to keep the translated title length close to the original when possible.
4. Ensure the translation maintains the original meaning and tone.
@ -15,25 +15,25 @@ module DiscourseAi
1. Read and understand the title carefully.
2. Identify any proper nouns or technical terms that should remain untranslated.
3. Translate the remaining words and phrases into the target_language, ensuring the meaning is preserved.
3. Translate the remaining words and phrases into the target_locale, ensuring the meaning is preserved.
4. Adjust the translation if necessary to keep the length similar to the original title.
5. Review your translation for accuracy and naturalness in the target_language.
5. Review your translation for accuracy and naturalness in the target_locale.
Provide your translation in the following JSON format:
<output>
{"translation": "Your target_language translation here"}
{"translation": "Your target_locale translation here"}
</output>
Here are three examples of correct translation
Original: {"title":"New Update for Minecraft Adds Underwater Temples", "target_language":"Spanish"}
Original: {"title":"New Update for Minecraft Adds Underwater Temples", "target_locale":"es"}
Correct translation: {"translation": "Nueva actualización para Minecraft añade templos submarinos"}
Original: {"title":"Toyota announces revolutionary battery technology", "target_language":"French"}
Original: {"title":"Toyota announces revolutionary battery technology", "target_locale":"fr"}
Correct translation: {"translation": "Toyota annonce une technologie de batteries révolutionnaire"}
Original: {"title": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_language": "English"}
Original: {"title": "Heathrow fechado: paralisação de voos deve continuar nos próximos dias, diz gestora do aeroporto de Londres", "target_locale": "en"}
Correct translation: {"translation": "Heathrow closed: flight disruption expected to continue in coming days, says London airport management"}
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the title now and provide your answer in the specified JSON format.

View File

@ -10,16 +10,18 @@ describe DiscourseAi::Translation::BaseTranslator do
end
describe ".translate" do
let(:text_to_translate) { "cats are great" }
let(:target_language) { "de" }
let(:text) { "cats are great" }
let(:target_locale) { "de" }
let(:llm_response) { "hur dur hur dur!" }
it "creates the correct prompt" do
post_translator =
DiscourseAi::Translation::PostRawTranslator.new(text_to_translate, target_language)
DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, topic_id: 1)
allow(DiscourseAi::Completions::Prompt).to receive(:new).with(
DiscourseAi::Translation::PostRawTranslator::PROMPT_TEMPLATE,
messages: [{ type: :user, content: post_translator.formatted_content, id: "user" }],
topic_id: 1,
post_id: nil,
).and_call_original
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
@ -30,8 +32,7 @@ describe DiscourseAi::Translation::BaseTranslator do
it "sends the translation prompt to the selected ai helper model" do
mock_prompt = instance_double(DiscourseAi::Completions::Prompt)
mock_llm = instance_double(DiscourseAi::Completions::Llm)
post_translator =
DiscourseAi::Translation::PostRawTranslator.new(text_to_translate, target_language)
post_translator = DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:)
structured_output =
DiscourseAi::Completions::StructuredOutput.new({ translation: { type: "string" } })
@ -54,10 +55,7 @@ describe DiscourseAi::Translation::BaseTranslator do
it "returns the translation from the llm's response" do
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
expect(
DiscourseAi::Translation::PostRawTranslator.new(
text_to_translate,
target_language,
).translate,
DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:).translate,
).to eq "hur dur hur dur!"
end
end

View File

@ -12,8 +12,8 @@ describe DiscourseAi::Translation::CategoryLocalizer do
def post_raw_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::PostRawTranslator)
allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
text: opts[:text],
target_locale: opts[:target_locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
@ -21,8 +21,8 @@ describe DiscourseAi::Translation::CategoryLocalizer do
def short_text_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::ShortTextTranslator)
allow(DiscourseAi::Translation::ShortTextTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
text: opts[:text],
target_locale: opts[:target_locale],
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
@ -32,16 +32,20 @@ describe DiscourseAi::Translation::CategoryLocalizer do
end
describe ".localize" do
let(:target_locale) { :fr }
let(:target_locale) { "fr" }
it "translates the category name and description" do
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Catégorie de Test"
short_text_translator_stub(
{ value: category.name, locale: target_locale, translated: translated_cat_name },
{ text: category.name, target_locale: target_locale, translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: target_locale, translated: translated_cat_desc },
{
text: category.description,
target_locale: target_locale,
translated: translated_cat_desc,
},
)
res = localizer.localize(category, target_locale)
@ -54,13 +58,13 @@ describe DiscourseAi::Translation::CategoryLocalizer do
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Catégorie de Test"
short_text_translator_stub(
{ value: category.name, locale: :fr, translated: translated_cat_name },
{ text: category.name, target_locale:, translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: :fr, translated: translated_cat_desc },
{ text: category.description, target_locale:, translated: translated_cat_desc },
)
res = localizer.localize(category, "fr")
res = localizer.localize(category, target_locale)
expect(res.name).to eq(translated_cat_name)
expect(res.description).to eq(translated_cat_desc)
@ -79,10 +83,10 @@ describe DiscourseAi::Translation::CategoryLocalizer do
translated_cat_desc = "C'est une catégorie de test"
translated_cat_name = "Esta es una categoría de prueba"
short_text_translator_stub(
{ value: category.name, locale: :es, translated: translated_cat_name },
{ text: category.name, target_locale: "es", translated: translated_cat_name },
)
post_raw_translator_stub(
{ value: category.description, locale: :es, translated: translated_cat_desc },
{ text: category.description, target_locale: "es", translated: translated_cat_desc },
)
res = localizer.localize(category)

View File

@ -11,8 +11,10 @@ describe DiscourseAi::Translation::PostLocalizer do
def post_raw_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::PostRawTranslator)
allow(DiscourseAi::Translation::PostRawTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
text: opts[:text],
target_locale: opts[:target_locale],
post_id: opts[:post_id] || post.id,
topic_id: opts[:topic_id] || post.topic_id,
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
@ -33,19 +35,19 @@ describe DiscourseAi::Translation::PostLocalizer do
end
it "translates with post and locale" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
post_raw_translator_stub({ text: post.raw, target_locale: "ja", translated: translated_raw })
described_class.localize(post, "ja")
end
it "normalizes dashes to underscores and symbol type for locale" do
post_raw_translator_stub({ value: post.raw, locale: :zh_CN, translated: "你好,世界" })
post_raw_translator_stub({ text: post.raw, target_locale: "zh_CN", translated: "你好,世界" })
described_class.localize(post, "zh-CN")
end
it "finds or creates a PostLocalization and sets its fields" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
post_raw_translator_stub({ text: post.raw, target_locale: "ja", translated: translated_raw })
expect {
res = described_class.localize(post, target_locale)
expect(res).to be_a(PostLocalization)
@ -61,7 +63,7 @@ describe DiscourseAi::Translation::PostLocalizer do
end
it "updates an existing PostLocalization if present" do
post_raw_translator_stub({ value: post.raw, locale: :ja, translated: translated_raw })
post_raw_translator_stub({ text: post.raw, target_locale: "ja", translated: translated_raw })
localization =
Fabricate(:post_localization, post: post, locale: "ja", raw: "old", cooked: "old_cooked")
expect {

View File

@ -18,8 +18,9 @@ describe DiscourseAi::Translation::TopicLocalizer do
def topic_title_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::TopicTitleTranslator)
allow(DiscourseAi::Translation::TopicTitleTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
text: opts[:text],
target_locale: opts[:target_locale],
topic_id: opts[:topic_id] || topic.id,
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
@ -27,8 +28,9 @@ describe DiscourseAi::Translation::TopicLocalizer do
def short_text_translator_stub(opts)
mock = instance_double(DiscourseAi::Translation::ShortTextTranslator)
allow(DiscourseAi::Translation::ShortTextTranslator).to receive(:new).with(
opts[:value],
opts[:locale],
text: opts[:text],
target_locale: opts[:target_locale],
topic_id: opts[:topic_id] || topic.id,
).and_return(mock)
allow(mock).to receive(:translate).and_return(opts[:translated])
end
@ -49,25 +51,33 @@ describe DiscourseAi::Translation::TopicLocalizer do
end
it "translates with topic and locale" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
topic_title_translator_stub(
{ text: topic.title, target_locale: "ja", translated: translated_title },
)
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
{ text: topic.excerpt, target_locale: "ja", translated: translated_excerpt },
)
described_class.localize(topic, "ja")
end
it "normalizes dashes to underscores and symbol type for locale" do
topic_title_translator_stub({ value: topic.title, locale: :zh_CN, translated: "这是一个猫主题 :)" })
short_text_translator_stub({ value: topic.excerpt, locale: :zh_CN, translated: "这是一个猫主题 :)" })
topic_title_translator_stub(
{ text: topic.title, target_locale: "zh_CN", translated: "这是一个猫主题 :)" },
)
short_text_translator_stub(
{ text: topic.excerpt, target_locale: "zh_CN", translated: "这是一个猫主题 :)" },
)
described_class.localize(topic, "zh-CN")
end
it "finds or creates a TopicLocalization and sets its fields" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
topic_title_translator_stub(
{ text: topic.title, target_locale: "ja", translated: translated_title },
)
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
{ text: topic.excerpt, target_locale: "ja", translated: translated_excerpt },
)
expect {
@ -85,9 +95,11 @@ describe DiscourseAi::Translation::TopicLocalizer do
end
it "updates an existing TopicLocalization if present" do
topic_title_translator_stub({ value: topic.title, locale: :ja, translated: translated_title })
topic_title_translator_stub(
{ text: topic.title, target_locale: "ja", translated: translated_title },
)
short_text_translator_stub(
{ value: topic.excerpt, locale: :ja, translated: translated_excerpt },
{ text: topic.excerpt, target_locale: "ja", translated: translated_excerpt },
)
localization =