FEATURE: Auto image captions (#637)

This commit is contained in:
Keegan George 2024-05-27 10:49:24 -07:00 committed by GitHub
parent baf88e7cfc
commit a1c649965f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 526 additions and 14 deletions

View File

@ -107,20 +107,50 @@ module DiscourseAi
status: 502 status: 502
end end
def random_caption
captions = [
"A beautiful landscape",
"An adorable puppy",
"A delicious meal",
"A cozy fireplace",
"A stunning sunset",
"A charming cityscape",
"A peaceful garden",
"A majestic mountain range",
"A captivating work of art",
]
captions.sample
end
def caption_image def caption_image
image_url = params[:image_url] image_url = params[:image_url]
raise Discourse::InvalidParameters.new(:image_url) if !image_url image_url_type = params[:image_url_type]
raise Discourse::InvalidParameters.new(:image_url) if !image_url
raise Discourse::InvalidParameters.new(:image_url) if !image_url_type
if image_url_type == "short_path"
image = Upload.find_by(sha1: Upload.sha1_from_short_path(image_url))
elsif image_url_type == "short_url"
image = Upload.find_by(sha1: Upload.sha1_from_short_url(image_url))
else
image = upload_from_full_url(image_url)
end
image = upload_from_full_url(image_url)
raise Discourse::NotFound if image.blank? raise Discourse::NotFound if image.blank?
final_image_url = get_caption_url(image, image_url) final_image_url = get_caption_url(image, image_url)
hijack do hijack do
caption = if Rails.env.development?
DiscourseAi::AiHelper::Assistant.new.generate_image_caption( sleep 2 # Simulate a delay of 2 seconds
final_image_url, caption = random_caption
current_user, else
) caption =
DiscourseAi::AiHelper::Assistant.new.generate_image_caption(
final_image_url,
current_user,
)
end
render json: { render json: {
caption: caption:
"#{caption} (#{I18n.t("discourse_ai.ai_helper.image_caption.attribution")})", "#{caption} (#{I18n.t("discourse_ai.ai_helper.image_caption.attribution")})",

View File

@ -0,0 +1,19 @@
import Component from "@glimmer/component";
import { service } from "@ember/service";
import loadingSpinner from "discourse/helpers/loading-spinner";
import i18n from "discourse-common/helpers/i18n";
export default class AiImageCaptionLoader extends Component {
@service imageCaptionPopup;
<template>
{{#if this.imageCaptionPopup.showAutoCaptionLoader}}
<div class="auto-image-caption-loader">
{{loadingSpinner size="small"}}
<span>{{i18n
"discourse_ai.ai_helper.image_caption.automatic_caption_loading"
}}</span>
</div>
{{/if}}
</template>
}

View File

@ -0,0 +1,19 @@
import Component from "@glimmer/component";
import { LinkTo } from "@ember/routing";
import dIcon from "discourse-common/helpers/d-icon";
import i18n from "discourse-common/helpers/i18n";
export default class AutoImageCaptionSetting extends Component {
static shouldRender(outletArgs, helper) {
return helper.siteSettings.discourse_ai_enabled;
}
<template>
<li class="user-nav__preferences-ai">
<LinkTo @route="preferences.ai">
{{dIcon "discourse-sparkles"}}
<span>{{i18n "discourse_ai.title"}}</span>
</LinkTo>
</li>
</template>
}

View File

@ -0,0 +1,37 @@
import { tracked } from "@glimmer/tracking";
import Controller from "@ember/controller";
import { action } from "@ember/object";
import { service } from "@ember/service";
import { popupAjaxError } from "discourse/lib/ajax-error";
import { isTesting } from "discourse-common/config/environment";
const AI_ATTRS = ["auto_image_caption"];
export default class PreferencesAiController extends Controller {
@service siteSettings;
@tracked saved = false;
get canToggleAutoImageCaption() {
const userGroups = this.model.groups.map((g) => g.id);
const captionGroups = this.siteSettings.ai_auto_image_caption_allowed_groups
.split("|")
.map((id) => parseInt(id, 10));
return userGroups.some((groupId) => captionGroups.includes(groupId));
}
@action
save() {
this.saved = false;
return this.model
.save(AI_ATTRS)
.then(() => {
this.saved = true;
if (!isTesting()) {
location.reload();
}
})
.catch(popupAjaxError);
}
}

View File

@ -0,0 +1,7 @@
export default {
resource: "user.preferences",
map() {
this.route("ai");
},
};

View File

@ -0,0 +1,15 @@
import { service } from "@ember/service";
import { defaultHomepage } from "discourse/lib/utilities";
import RestrictedUserRoute from "discourse/routes/restricted-user";
export default class PreferencesAiRoute extends RestrictedUserRoute {
@service siteSettings;
setupController(controller, user) {
if (!this.siteSettings.discourse_ai_enabled) {
return this.router.transitionTo(`discovery.${defaultHomepage()}`);
}
controller.set("model", user);
}
}

View File

@ -12,6 +12,7 @@ export default class ImageCaptionPopup extends Service {
@tracked newCaption = null; @tracked newCaption = null;
@tracked loading = false; @tracked loading = false;
@tracked popupTrigger = null; @tracked popupTrigger = null;
@tracked showAutoCaptionLoader = false;
@tracked _request = null; @tracked _request = null;
updateCaption() { updateCaption() {

View File

@ -0,0 +1,24 @@
<label class="control-label">{{i18n "discourse_ai.title"}}</label>
{{#if this.canToggleAutoImageCaption}}
<div class="control-group ai-setting">
<PreferenceCheckbox
@labelKey="discourse_ai.ai_helper.image_caption.automatic_caption_setting"
@checked={{this.model.user_option.auto_image_caption}}
data-setting-name="auto-image-caption"
class="pref-auto-image-caption"
/>
</div>
<SaveControls
@id="user_ai_preference_save"
@model={{this.model}}
@action={{this.save}}
@saved={{this.saved}}
/>
{{else}}
<EmptyState
@title={{i18n "discourse_ai.user_preferences.empty_state.title"}}
@body={{i18n "discourse_ai.user_preferences.empty_state.body"}}
/>
{{/if}}

View File

@ -1,7 +1,9 @@
import { ajax } from "discourse/lib/ajax"; import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error"; import { popupAjaxError } from "discourse/lib/ajax-error";
import { apiInitializer } from "discourse/lib/api"; import { apiInitializer } from "discourse/lib/api";
import { getUploadMarkdown, isImage } from "discourse/lib/uploads";
import I18n from "discourse-i18n"; import I18n from "discourse-i18n";
import { IMAGE_MARKDOWN_REGEX } from "../discourse/lib/utilities";
export default apiInitializer("1.25.0", (api) => { export default apiInitializer("1.25.0", (api) => {
const buttonAttrs = { const buttonAttrs = {
@ -19,6 +21,8 @@ export default apiInitializer("1.25.0", (api) => {
return; return;
} }
api.addSaveableUserOptionField("auto_image_caption");
api.addComposerImageWrapperButton( api.addComposerImageWrapperButton(
buttonAttrs.label, buttonAttrs.label,
buttonAttrs.class, buttonAttrs.class,
@ -56,6 +60,7 @@ export default apiInitializer("1.25.0", (api) => {
method: "POST", method: "POST",
data: { data: {
image_url: imageSrc, image_url: imageSrc,
image_url_type: "long_url",
}, },
} }
); );
@ -78,4 +83,154 @@ export default apiInitializer("1.25.0", (api) => {
} }
} }
); );
// Checks if image is small (≤ 0.4 MP)
function isSmallImage(width, height) {
const megapixels = (width * height) / 1000000;
return megapixels <= 0.4;
}
function needsImprovedCaption(caption) {
return caption.length < 20 || caption.split(" ").length === 1;
}
function getUploadUrlFromMarkdown(markdown) {
const regex = /\(upload:\/\/([^)]+)\)/;
const match = markdown.match(regex);
return match ? `upload://${match[1]}` : null;
}
async function fetchImageCaption(imageUrl, urlType) {
try {
const response = await ajax(`/discourse-ai/ai-helper/caption_image`, {
method: "POST",
data: {
image_url: imageUrl,
image_url_type: urlType,
},
});
return response.caption;
} catch (error) {
popupAjaxError(error);
}
}
const autoCaptionAllowedGroups =
settings?.ai_auto_image_caption_allowed_groups
.split("|")
.map((id) => parseInt(id, 10));
const currentUserGroups = currentUser.groups.map((g) => g.id);
if (
!currentUserGroups.some((groupId) =>
autoCaptionAllowedGroups.includes(groupId)
)
) {
return;
}
// Automatically caption uploaded images
api.addComposerUploadMarkdownResolver(async (upload) => {
const autoCaptionEnabled = currentUser.get(
"user_option.auto_image_caption"
);
if (
!autoCaptionEnabled ||
!isImage(upload.url) ||
!needsImprovedCaption(upload.original_filename) ||
isSmallImage(upload.width, upload.height)
) {
return getUploadMarkdown(upload);
}
const caption = await fetchImageCaption(upload.url, "long_url");
return `![${caption}|${upload.thumbnail_width}x${upload.thumbnail_height}](${upload.short_url})`;
});
// Conditionally show dialog to auto image caption
api.composerBeforeSave(() => {
return new Promise((resolve, reject) => {
const dialog = api.container.lookup("service:dialog");
const composer = api.container.lookup("service:composer");
const localePrefix =
"discourse_ai.ai_helper.image_caption.automatic_caption_dialog";
const autoCaptionEnabled = currentUser.get(
"user_option.auto_image_caption"
);
const imageUploads = composer.model.reply.match(IMAGE_MARKDOWN_REGEX);
const hasImageUploads = imageUploads?.length > 0;
const imagesToCaption = imageUploads.filter((image) => {
const caption = image
.substring(image.indexOf("[") + 1, image.indexOf("]"))
.split("|")[0];
// We don't check if the image is small to show the prompt here
// because the width/height are the thumbnail sizes so the mp count
// is incorrect. It doesn't matter because the auto caption won't
// happen anyways if its small because that uses the actual upload dimensions
return needsImprovedCaption(caption);
});
const needsBetterCaptions = imagesToCaption?.length > 0;
const keyValueStore = api.container.lookup("service:key-value-store");
const imageCaptionPopup = api.container.lookup(
"service:imageCaptionPopup"
);
const autoCaptionPromptKey = "ai-auto-caption-seen";
const seenAutoCaptionPrompt = keyValueStore.getItem(autoCaptionPromptKey);
if (
autoCaptionEnabled ||
!hasImageUploads ||
!needsBetterCaptions ||
seenAutoCaptionPrompt
) {
return resolve();
}
keyValueStore.setItem(autoCaptionPromptKey, true);
dialog.confirm({
message: I18n.t(`${localePrefix}.prompt`),
confirmButtonLabel: `${localePrefix}.confirm`,
cancelButtonLabel: `${localePrefix}.cancel`,
class: "ai-image-caption-prompt-dialog",
didConfirm: async () => {
try {
currentUser.set("user_option.auto_image_caption", true);
await currentUser.save(["auto_image_caption"]);
imagesToCaption.forEach(async (imageMarkdown) => {
const uploadUrl = getUploadUrlFromMarkdown(imageMarkdown);
imageCaptionPopup.showAutoCaptionLoader = true;
const caption = await fetchImageCaption(uploadUrl, "short_url");
// Find and replace the caption in the reply
const regex = new RegExp(
`(!\\[)[^|]+(\\|[^\\]]+\\]\\(${uploadUrl}\\))`
);
const newReply = composer.model.reply.replace(
regex,
`$1${caption}$2`
);
composer.model.set("reply", newReply);
imageCaptionPopup.showAutoCaptionLoader = false;
resolve();
});
} catch (error) {
// Reject the promise if an error occurs
// Show an error saying unable to generate captions
reject(error);
}
},
didCancel: () => {
// Don't enable auto captions and continue with the save
resolve();
},
});
});
});
}); });

View File

@ -569,3 +569,17 @@
border-right-color: var(--tertiary); border-right-color: var(--tertiary);
} }
} }
.ai-image-caption-prompt-dialog {
.dialog-content {
max-width: 555px;
}
}
.auto-image-caption-loader {
margin-left: 2rem;
display: flex;
align-items: center;
gap: 0.5rem;
color: var(--primary-high);
}

View File

@ -118,6 +118,12 @@ en:
discourse_ai: discourse_ai:
title: "AI" title: "AI"
user_preferences:
empty_state:
title: "No AI specific user preferences available."
body: "There are currently no user preferences related to AI that are available for you to toggle."
modals: modals:
select_option: "Select an option..." select_option: "Select an option..."
@ -211,7 +217,7 @@ en:
edit: "Edit" edit: "Edit"
saved: "LLM Model Saved" saved: "LLM Model Saved"
back: "Back" back: "Back"
tests: tests:
title: "Run Test" title: "Run Test"
running: "Running test..." running: "Running test..."
success: "Success!" success: "Success!"
@ -277,6 +283,12 @@ en:
generating: "Generating caption..." generating: "Generating caption..."
credits: "Captioned by AI" credits: "Captioned by AI"
save_caption: "Save" save_caption: "Save"
automatic_caption_setting: "Enable automatic AI image captions"
automatic_caption_loading: "Captioning images..."
automatic_caption_dialog:
prompt: "This post contains non-captioned images. Would you like to enable automatic AI captions on image uploads? (This can be changed in your preferences later)"
confirm: "Enable"
cancel: "Don't ask again"
reviewables: reviewables:
model_used: "Model used:" model_used: "Model used:"

View File

@ -75,6 +75,7 @@ en:
ai_helper_enabled_features: "Select the features to enable in the AI helper." ai_helper_enabled_features: "Select the features to enable in the AI helper."
post_ai_helper_allowed_groups: "User groups allowed to access AI Helper features in posts" post_ai_helper_allowed_groups: "User groups allowed to access AI Helper features in posts"
ai_helper_image_caption_model: "Select the model to use for generating image captions" ai_helper_image_caption_model: "Select the model to use for generating image captions"
ai_auto_image_caption_allowed_groups: "Users on these groups can toggle automatic image captioning."
ai_embeddings_enabled: "Enable the embeddings module." ai_embeddings_enabled: "Enable the embeddings module."
ai_embeddings_discourse_service_api_endpoint: "URL where the API is running for the embeddings module" ai_embeddings_discourse_service_api_endpoint: "URL where the API is running for the embeddings module"

View File

@ -54,3 +54,10 @@ Discourse::Application.routes.draw do
end end
end end
end end
Discourse::Application.routes.append do
get "u/:username/preferences/ai" => "users#preferences",
:constraints => {
username: RouteFormat.username,
}
end

View File

@ -249,6 +249,13 @@ discourse_ai:
choices: choices:
- "llava" - "llava"
- "open_ai:gpt-4-vision-preview" - "open_ai:gpt-4-vision-preview"
ai_auto_image_caption_allowed_groups:
client: true
type: group_list
list_type: compact
default: "10" # 10: @trust_level_0
allow_any: false
refresh: true
ai_embeddings_enabled: ai_embeddings_enabled:
default: false default: false

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
class AddAutoImageCaptionToUserOptions < ActiveRecord::Migration[7.0]
def change
add_column :user_options, :auto_image_caption, :boolean, default: false, null: false
end
end

View File

@ -42,6 +42,17 @@ module DiscourseAi
root: false, root: false,
) )
end end
UserUpdater::OPTION_ATTR.push(:auto_image_caption)
plugin.add_to_serializer(
:user_option,
:auto_image_caption,
include_condition: -> do
SiteSetting.composer_ai_helper_enabled &&
SiteSetting.ai_helper_enabled_features.include?("image_caption") &&
scope.user.in_any_groups?(SiteSetting.ai_auto_image_caption_allowed_groups_map)
end,
) { object.auto_image_caption }
end end
end end
end end

View File

@ -131,16 +131,54 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
end end
it "returns the suggested caption for the image" do it "returns the suggested caption for the image" do
post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url } post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "long_url",
}
expect(response.status).to eq(200) expect(response.status).to eq(200)
expect(response.parsed_body["caption"]).to eq(caption_with_attrs) expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
end end
context "when the image_url is a short_url" do
let(:image_url) { upload.short_url }
it "returns the suggested caption for the image" do
post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "short_url",
}
expect(response.status).to eq(200)
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
end
end
context "when the image_url is a short_path" do
let(:image_url) { "#{Discourse.base_url}#{upload.short_path}" }
it "returns the suggested caption for the image" do
post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "short_path",
}
expect(response.status).to eq(200)
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
end
end
it "returns a 502 error when the completion call fails" do it "returns a 502 error when the completion call fails" do
stub_request(:post, "https://example.com/predictions").to_return(status: 502) stub_request(:post, "https://example.com/predictions").to_return(status: 502)
post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url } post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "long_url",
}
expect(response.status).to eq(502) expect(response.status).to eq(502)
end end
@ -155,6 +193,7 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
post "/discourse-ai/ai-helper/caption_image", post "/discourse-ai/ai-helper/caption_image",
params: { params: {
image_url: "http://blah.com/img.jpeg", image_url: "http://blah.com/img.jpeg",
image_url_type: "long_url",
} }
expect(response.status).to eq(404) expect(response.status).to eq(404)
@ -172,13 +211,21 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
before { enable_secure_uploads } before { enable_secure_uploads }
it "returns a 403 error if the user cannot access the secure upload" do it "returns a 403 error if the user cannot access the secure upload" do
post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url } post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "long_url",
}
expect(response.status).to eq(403) expect(response.status).to eq(403)
end end
it "returns a 200 message and caption if user can access the secure upload" do it "returns a 200 message and caption if user can access the secure upload" do
group.add(user) group.add(user)
post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url } post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "long_url",
}
expect(response.status).to eq(200) expect(response.status).to eq(200)
expect(response.parsed_body["caption"]).to eq(caption_with_attrs) expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
end end
@ -188,7 +235,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
it "creates a signed URL properly and makes the caption" do it "creates a signed URL properly and makes the caption" do
group.add(user) group.add(user)
post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url } post "/discourse-ai/ai-helper/caption_image",
params: {
image_url: image_url,
image_url_type: "long_url",
}
expect(response.status).to eq(200) expect(response.status).to eq(200)
expect(response.parsed_body["caption"]).to eq(caption_with_attrs) expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
end end

View File

@ -3,9 +3,10 @@
RSpec.describe "AI image caption", type: :system, js: true do RSpec.describe "AI image caption", type: :system, js: true do
fab!(:user) { Fabricate(:admin, refresh_auto_groups: true) } fab!(:user) { Fabricate(:admin, refresh_auto_groups: true) }
fab!(:non_member_group) { Fabricate(:group) } fab!(:non_member_group) { Fabricate(:group) }
let(:user_preferences_ai_page) { PageObjects::Pages::UserPreferencesAi.new }
let(:composer) { PageObjects::Components::Composer.new } let(:composer) { PageObjects::Components::Composer.new }
let(:popup) { PageObjects::Components::AiCaptionPopup.new } let(:popup) { PageObjects::Components::AiCaptionPopup.new }
let(:dialog) { PageObjects::Components::Dialog.new }
let(:file_path) { file_from_fixtures("logo.jpg", "images").path } let(:file_path) { file_from_fixtures("logo.jpg", "images").path }
let(:caption) do let(:caption) do
"The image shows a stylized speech bubble icon with a multicolored border on a black background." "The image shows a stylized speech bubble icon with a multicolored border on a black background."
@ -80,4 +81,74 @@ RSpec.describe "AI image caption", type: :system, js: true do
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
end end
end end
describe "automatic image captioning" do
context "when toggling the setting from the user preferences page" do
before { user.user_option.update!(auto_image_caption: false) }
it "should update the preference to enabled" do
user_preferences_ai_page.visit(user)
user_preferences_ai_page.toggle_setting("pref-auto-image-caption")
user_preferences_ai_page.save_changes
wait_for(timeout: 5) { user.reload.user_option.auto_image_caption }
expect(user.reload.user_option.auto_image_caption).to eq(true)
end
end
context "when the user preference is disabled" do
before { user.user_option.update!(auto_image_caption: false) }
it "should show a prompt when submitting a post with captionable images uploaded" do
visit("/latest")
page.find("#create-topic").click
attach_file([file_path]) { composer.click_toolbar_button("upload") }
wait_for { composer.has_no_in_progress_uploads? }
composer.fill_title("I love using Discourse! It is my favorite forum software")
composer.create
expect(dialog).to be_open
end
it "should not show a prompt when submitting a post with no captionable images uploaded" do
original_file_path = Rails.root.join("spec/fixtures/images/logo.jpg")
temp_file_path = Rails.root.join("spec/fixtures/images/An image of Discourse logo.jpg")
FileUtils.cp(original_file_path, temp_file_path)
visit("/latest")
page.find("#create-topic").click
attach_file([temp_file_path]) { composer.click_toolbar_button("upload") }
wait_for { composer.has_no_in_progress_uploads? }
composer.fill_title("I love using Discourse! It is my favorite forum software")
composer.create
expect(dialog).to be_closed
end
it "should auto caption the existing images and update the preference when dialog is accepted" do
visit("/latest")
page.find("#create-topic").click
attach_file([file_path]) { composer.click_toolbar_button("upload") }
wait_for { composer.has_no_in_progress_uploads? }
composer.fill_title("I love using Discourse! It is my favorite forum software")
composer.create
dialog.click_yes
wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs }
expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs)
end
end
context "when the user preference is enabled" do
before { user.user_option.update!(auto_image_caption: true) }
skip "TODO: Fix auto_image_caption user option not present in testing environment?" do
it "should auto caption the image after uploading" do
visit("/latest")
page.find("#create-topic").click
attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do
composer.click_toolbar_button("upload")
end
wait_for { composer.has_no_in_progress_uploads? }
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
end
end
end
end
end end

View File

@ -0,0 +1,24 @@
# frozen_string_literal: true
module PageObjects
module Pages
class UserPreferencesAi < PageObjects::Pages::Base
def visit(user)
page.visit("/u/#{user.username}/preferences/ai")
self
end
def has_ai_preference_checked?(preference)
page.find(".#{preference} input").checked?
end
def toggle_setting(preference)
page.find(".#{preference} input").click
end
def save_changes
page.find(".save-changes").click
end
end
end
end