FEATURE: GPT4o Tokenizer (#721)
This commit is contained in:
parent
7f2c3a1ab9
commit
3502f0f1cd
|
@ -0,0 +1,13 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Tokenizer
|
||||
class OpenAiGpt4oTokenizer < OpenAiTokenizer
|
||||
class << self
|
||||
def tokenizer
|
||||
@@tokenizer ||= Tiktoken.get_encoding("o200k_base")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -9,7 +9,7 @@
|
|||
# required_version: 2.7.0
|
||||
|
||||
gem "tokenizers", "0.4.4"
|
||||
gem "tiktoken_ruby", "0.0.7"
|
||||
gem "tiktoken_ruby", "0.0.9"
|
||||
|
||||
enabled_site_setting :discourse_ai_enabled
|
||||
|
||||
|
|
|
@ -109,6 +109,16 @@ describe DiscourseAi::Tokenizer::OpenAiTokenizer do
|
|||
end
|
||||
end
|
||||
|
||||
describe DiscourseAi::Tokenizer::OpenAiGpt4oTokenizer do
|
||||
describe "#size" do
|
||||
describe "returns a token count" do
|
||||
it "for a sentence with punctuation and capitalization and numbers" do
|
||||
expect(described_class.size("Hello, World! 123")).to eq(6)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe DiscourseAi::Tokenizer::AllMpnetBaseV2Tokenizer do
|
||||
describe "#size" do
|
||||
describe "returns a token count" do
|
||||
|
|
Loading…
Reference in New Issue