2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# frozen_string_literal: true
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								module DiscourseAi
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  module Summarization
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    class SummaryGenerator
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      def initialize(target, user)
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        @target = target
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        @user = user
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def summarize!(content_since)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        content = get_content(content_since)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        send("#{summarization_provider}_summarization", content)
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      private
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      attr_reader :target, :user
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def summarization_provider
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        case model
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-11 15:35:58 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        in "gpt-3.5-turbo" | "gpt-4"
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          "openai"
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-11 15:35:58 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        in "claude-v1" | "claude-v1-100k"
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          "anthropic"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "discourse"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def get_content(content_since)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        case target
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        in Post
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          target.raw
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        in Topic
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          TopicView
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .new(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              target,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              user,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                filter: "summary",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                exclude_deleted_users: true,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                exclude_hidden: true,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                show_deleted: false,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								              },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .posts
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .pluck(:raw)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .join("\n")
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        in ::Chat::Channel
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          target
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .chat_messages
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .where("chat_messages.created_at > ?", content_since.hours.ago)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .includes(:user)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .order(created_at: :asc)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .pluck(:username_lower, :message)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .map { "#{_1}: #{_2}" }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            .join("\n")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          raise "Can't find content to summarize"
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def discourse_summarization(content)
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        truncated_content = DiscourseAi::Tokenizer::BertTokenizer.truncate(content, max_length)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ::DiscourseAi::Inference::DiscourseClassifier.perform!(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          model,
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          truncated_content,
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-02 21:41:11 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          SiteSetting.ai_summarization_discourse_service_api_key,
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ).dig(:summary_text)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def openai_summarization(content)
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        truncated_content =
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        messages = [{ role: "system", content: <<~TEXT }]
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          Summarize the following article:\n\n#{truncated_content}
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        TEXT
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ::DiscourseAi::Inference::OpenAiCompletions.perform!(messages, model).dig(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          :choices,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          0,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          :message,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          :content,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      def anthropic_summarization(content)
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        truncated_content =
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        messages =
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "Human: Summarize the following article that is inside <input> tags.
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          Please include only the summary inside <ai> tags.
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          <input>##{truncated_content}</input>
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          Assistant:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        "
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        response =
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-11 15:35:58 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          ::DiscourseAi::Inference::AnthropicCompletions.perform!(messages, model).dig(:completion)
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-10 11:04:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Nokogiri::HTML5.fragment(response).at("ai").text
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def model
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        SiteSetting.ai_summarization_model
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      def max_length
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        lengths = {
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-15 15:10:42 -03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								          "bart-large-cnn-samsum" => 1024,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "flan-t5-base-samsum" => 512,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "long-t5-tglobal-base-16384-book-summary" => 16_384,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "gpt-3.5-turbo" => 4096,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "gpt-4" => 8192,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "claude-v1" => 9000,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          "claude-v1-100k" => 100_000,
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-19 16:57:31 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        lengths[model]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      end
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-04 11:24:09 -03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								end
							 |