| 
									
										
										
										
											2023-08-16 15:09:41 -03:00
										 |  |  | # frozen_string_literal: true | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | RSpec.describe DiscourseAi::Embeddings::Strategies::Truncation do | 
					
						
							| 
									
										
										
										
											2023-09-05 11:08:23 -03:00
										 |  |  |   subject(:truncation) { described_class.new } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   describe "#prepare_text_from" do | 
					
						
							|  |  |  |     context "when using vector from OpenAI" do | 
					
						
							| 
									
										
										
										
											2023-08-16 15:09:41 -03:00
										 |  |  |       before { SiteSetting.max_post_length = 100_000 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       fab!(:topic) { Fabricate(:topic) } | 
					
						
							|  |  |  |       fab!(:post) do | 
					
						
							|  |  |  |         Fabricate(:post, topic: topic, raw: "Baby, bird, bird, bird\nBird is the word\n" * 500) | 
					
						
							|  |  |  |       end | 
					
						
							|  |  |  |       fab!(:post) do | 
					
						
							|  |  |  |         Fabricate( | 
					
						
							|  |  |  |           :post, | 
					
						
							|  |  |  |           topic: topic, | 
					
						
							|  |  |  |           raw: "Don't you know about the bird?\nEverybody knows that the bird is a word\n" * 400, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |       end | 
					
						
							|  |  |  |       fab!(:post) { Fabricate(:post, topic: topic, raw: "Surfin' bird\n" * 800) } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-05 11:08:23 -03:00
										 |  |  |       let(:model) do | 
					
						
							|  |  |  |         DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002.new(truncation) | 
					
						
							|  |  |  |       end | 
					
						
							| 
									
										
										
										
											2023-08-16 15:09:41 -03:00
										 |  |  | 
 | 
					
						
							|  |  |  |       it "truncates a topic" do | 
					
						
							| 
									
										
										
										
											2023-09-05 11:08:23 -03:00
										 |  |  |         prepared_text = | 
					
						
							|  |  |  |           truncation.prepare_text_from(topic, model.tokenizer, model.max_sequence_length) | 
					
						
							| 
									
										
										
										
											2023-08-16 15:09:41 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-05 11:08:23 -03:00
										 |  |  |         expect(model.tokenizer.size(prepared_text)).to be <= model.max_sequence_length | 
					
						
							| 
									
										
										
										
											2023-08-16 15:09:41 -03:00
										 |  |  |       end | 
					
						
							|  |  |  |     end | 
					
						
							|  |  |  |   end | 
					
						
							|  |  |  | end |