| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  | # frozen_string_literal: true | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | module DiscourseAi | 
					
						
							|  |  |  |   module Summarization | 
					
						
							|  |  |  |     module Models | 
					
						
							|  |  |  |       class Anthropic < Base | 
					
						
							|  |  |  |         def display_name | 
					
						
							|  |  |  |           "Anthropic's #{model}" | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def correctly_configured? | 
					
						
							|  |  |  |           SiteSetting.ai_anthropic_api_key.present? | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def configuration_hint | 
					
						
							|  |  |  |           I18n.t( | 
					
						
							|  |  |  |             "discourse_ai.summarization.configuration_hint", | 
					
						
							|  |  |  |             count: 1, | 
					
						
							|  |  |  |             setting: "ai_anthropic_api_key", | 
					
						
							|  |  |  |           ) | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |         def concatenate_summaries(summaries, &on_partial_blk) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |           instructions = <<~TEXT | 
					
						
							|  |  |  |             Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. | 
					
						
							|  |  |  |             Include only the summary inside <ai> tags. | 
					
						
							|  |  |  |           TEXT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" } | 
					
						
							|  |  |  |           instructions += "Assistant:\n" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |           completion(instructions, &on_partial_blk) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |         def summarize_with_truncation(contents, opts, &on_partial_blk) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |           instructions = build_base_prompt(opts) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           text_to_summarize = contents.map { |c| format_content_item(c) }.join | 
					
						
							| 
									
										
										
										
											2023-07-13 17:05:41 -03:00
										 |  |  |           truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  | 
 | 
					
						
							|  |  |  |           instructions += "<input>#{truncated_content}</input>\nAssistant:\n" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |           completion(instructions, &on_partial_blk) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |         def summarize_single(chunk_text, opts, &on_partial_blk) | 
					
						
							|  |  |  |           summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) | 
					
						
							| 
									
										
										
										
											2023-07-13 17:05:41 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         private | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |         def summarize_chunk(chunk_text, opts, &on_partial_blk) | 
					
						
							|  |  |  |           completion( | 
					
						
							|  |  |  |             build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n", | 
					
						
							|  |  |  |             &on_partial_blk | 
					
						
							|  |  |  |           ) | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def build_base_prompt(opts) | 
					
						
							| 
									
										
										
										
											2023-07-13 17:05:41 -03:00
										 |  |  |           initial_instruction = | 
					
						
							|  |  |  |             if opts[:single_chunk] | 
					
						
							|  |  |  |               "Summarize the following forum discussion inside the given <input> tag, creating a cohesive narrative." | 
					
						
							|  |  |  |             else | 
					
						
							|  |  |  |               "Summarize the following forum discussion inside the given <input> tag." | 
					
						
							|  |  |  |             end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |           base_prompt = <<~TEXT | 
					
						
							| 
									
										
										
										
											2023-07-13 17:05:41 -03:00
										 |  |  |             Human: #{initial_instruction} | 
					
						
							| 
									
										
										
										
											2023-09-04 12:04:47 -03:00
										 |  |  |             Try to keep the summary in the same language as the forum discussion. | 
					
						
							| 
									
										
										
										
											2023-08-16 15:09:52 -03:00
										 |  |  |             Format the response, including links, using markdown. | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |           TEXT | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-04 12:04:47 -03:00
										 |  |  |           base_prompt += <<~TEXT if opts[:resource_path] | 
					
						
							|  |  |  |               Try generating links as well the format is #{opts[:resource_path]}/POST_ID | 
					
						
							|  |  |  |               For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3) | 
					
						
							|  |  |  |             TEXT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           base_prompt += "Wrap the whole the summary inside <ai> tags.\n" | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  | 
 | 
					
						
							|  |  |  |           base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ | 
					
						
							|  |  |  |             :content_title | 
					
						
							|  |  |  |           ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-13 17:05:41 -03:00
										 |  |  |           base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk] | 
					
						
							| 
									
										
										
										
											2023-07-13 21:48:25 -03:00
										 |  |  | 
 | 
					
						
							|  |  |  |           base_prompt | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-11 15:08:54 -03:00
										 |  |  |         def completion(prompt, &on_partial_blk) | 
					
						
							|  |  |  |           # We need to discard any text that might come before the <ai> tag. | 
					
						
							|  |  |  |           # Instructing the model to reply only with the summary seems impossible. | 
					
						
							|  |  |  |           pre_tag_partial = +"" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |           if on_partial_blk | 
					
						
							|  |  |  |             on_partial_read = | 
					
						
							|  |  |  |               Proc.new do |partial| | 
					
						
							|  |  |  |                 if pre_tag_partial.include?("<ai>") | 
					
						
							|  |  |  |                   on_partial_blk.call(partial[:completion]) | 
					
						
							|  |  |  |                 else | 
					
						
							|  |  |  |                   pre_tag_partial << partial[:completion] | 
					
						
							|  |  |  |                 end | 
					
						
							|  |  |  |               end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             response = | 
					
						
							|  |  |  |               ::DiscourseAi::Inference::AnthropicCompletions.perform!( | 
					
						
							|  |  |  |                 prompt, | 
					
						
							|  |  |  |                 model, | 
					
						
							|  |  |  |                 &on_partial_read | 
					
						
							|  |  |  |               ) | 
					
						
							|  |  |  |           else | 
					
						
							|  |  |  |             response = | 
					
						
							|  |  |  |               ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig( | 
					
						
							|  |  |  |                 :completion, | 
					
						
							|  |  |  |               ) | 
					
						
							|  |  |  |           end | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-03 15:39:30 -03:00
										 |  |  |           Nokogiri::HTML5.fragment(response).at("ai").text.presence || response | 
					
						
							| 
									
										
										
										
											2023-06-27 12:26:33 -03:00
										 |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def tokenizer | 
					
						
							|  |  |  |           DiscourseAi::Tokenizer::AnthropicTokenizer | 
					
						
							|  |  |  |         end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         attr_reader :max_tokens | 
					
						
							|  |  |  |       end | 
					
						
							|  |  |  |     end | 
					
						
							|  |  |  |   end | 
					
						
							|  |  |  | end |