diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI.Tests/UtilitiesTests/TokenizerTests.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI.Tests/UtilitiesTests/TokenizerTests.cs new file mode 100644 index 000000000..4e417c13a --- /dev/null +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI.Tests/UtilitiesTests/TokenizerTests.cs @@ -0,0 +1,26 @@ +using Microsoft.Teams.AI.AI.Tokenizers; + +namespace Microsoft.Teams.AI.Tests.UtilitiesTests +{ + public class TokenizerTests + { + public static IEnumerable TokenizersObjects() + { + yield return new object[] { new GPTTokenizer() }; + yield return new object[] { new GPTTokenizer("gpt-4") }; + } + + + [Theory] + [MemberData(nameof(TokenizersObjects))] + public void ValidateResults(ITokenizer tokenizer) + { + string text = "Hello, World"; + Assert.NotNull(tokenizer); + IReadOnlyList tokens = tokenizer.Encode(text); + + Assert.Equal(new int[] { 9906, 11, 4435 }, tokens); + Assert.Equal(text, tokenizer.Decode(tokens)); + } + } +} diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/DataSources/TextDataSource.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/DataSources/TextDataSource.cs index 84f5a9827..0709d882e 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/DataSources/TextDataSource.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/DataSources/TextDataSource.cs @@ -16,7 +16,7 @@ public class TextDataSource : IDataSource public string Name { get; } private readonly string _text; - private List _tokens = new(); + private IReadOnlyList _tokens = new List(); /// /// Creates instance of `TextDataSource` diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/ActionAugmentationSection.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/ActionAugmentationSection.cs index aceee66f1..64274532a 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/ActionAugmentationSection.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/ActionAugmentationSection.cs @@ -18,7 +18,7 @@ public class ActionAugmentationSection : PromptSection public readonly Dictionary Actions; private readonly string _text; - private List? _tokens; + private IReadOnlyList? _tokens; private class ActionMap { @@ -64,7 +64,7 @@ public override async Task>> RenderAsMes this._tokens = tokenizer.Encode(this._text); } - List tokens = this._tokens; + IReadOnlyList tokens = this._tokens; bool tooLong = false; if (this._tokens.Count > maxTokens) diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/GroupSection.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/GroupSection.cs index 07d85d7bd..a94285f3b 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/GroupSection.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/GroupSection.cs @@ -59,7 +59,7 @@ public override async Task> RenderAsTextAsync(ITur // truncate if (this.Tokens > 1 && length > this.Tokens) { - List encoded = tokenizer.Encode(text); + IReadOnlyList encoded = tokenizer.Encode(text); text = tokenizer.Decode(encoded.Take(this.Tokens).ToList()); length = this.Tokens; } diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/PromptSection.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/PromptSection.cs index ecd9c5452..438010b5d 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/PromptSection.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Prompts/Sections/PromptSection.cs @@ -124,7 +124,7 @@ public virtual async Task> RenderAsTextAsync(ITurn // truncate if (this.Tokens > 1 && length > this.Tokens) { - List encoded = tokenizer.Encode(text); + IReadOnlyList encoded = tokenizer.Encode(text); text = tokenizer.Decode(encoded.Take(this.Tokens).ToList()); length = this.Tokens; } @@ -148,7 +148,7 @@ protected RenderedPromptSection> TruncateMessages(List encoded = tokenizer.Encode(text); + IReadOnlyList encoded = tokenizer.Encode(text); if (len + encoded.Count > budget) { diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/GPTTokenizer.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/GPTTokenizer.cs index 9f8eae27a..f24a07993 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/GPTTokenizer.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/GPTTokenizer.cs @@ -1,4 +1,4 @@ -using SharpToken; +using Microsoft.ML.Tokenizers; namespace Microsoft.Teams.AI.AI.Tokenizers { @@ -7,52 +7,37 @@ namespace Microsoft.Teams.AI.AI.Tokenizers /// public class GPTTokenizer : ITokenizer { - private readonly GptEncoding _encoding; + private readonly Tokenizer _encoding; /// - /// Creates an instance of `GPTTokenizer` using the `cl100k_base` encoding by default + /// Creates an instance of `GPTTokenizer` using "gpt-4" model name by default which is using the `cl100k_base` encoding /// - public GPTTokenizer() - { - this._encoding = GptEncoding.GetEncoding("cl100k_base"); - } + public GPTTokenizer() => _encoding = Tokenizer.CreateTiktokenForModel("gpt-4"); /// /// Creates an instance of `GPTTokenizer` /// /// encoding to use - public GPTTokenizer(GptEncoding encoding) - { - this._encoding = encoding; - } + public GPTTokenizer(Tokenizer encoding) => this._encoding = encoding; /// /// Creates an instance of `GPTTokenizer` /// /// model to encode/decode for - public GPTTokenizer(string model) - { - this._encoding = GptEncoding.GetEncodingForModel(model); - } + public GPTTokenizer(string model) => this._encoding = Tokenizer.CreateTiktokenForModel(model); /// /// Encode /// /// text to encode /// encoded tokens - public List Encode(string text) - { - return this._encoding.Encode(text); - } + public IReadOnlyList Encode(string text) => this._encoding.EncodeToIds(text); /// /// Decode /// /// tokens to decode /// decoded text - public string Decode(List tokens) - { - return this._encoding.Decode(tokens); - } + public string Decode(IEnumerable tokens) => this._encoding.Decode(tokens)!; } } diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/ITokenizer.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/ITokenizer.cs index abd29985b..a7a230cb6 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/ITokenizer.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/AI/Tokenizers/ITokenizer.cs @@ -10,13 +10,13 @@ public interface ITokenizer /// /// text to encode /// encoded bytes - public List Encode(string text); + public IReadOnlyList Encode(string text); /// /// Decode /// /// tokens to decode /// decoded string - public string Decode(List tokens); + public string Decode(IEnumerable tokens); } } diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Application/Authentication/OAuthAuthentication.cs b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Application/Authentication/OAuthAuthentication.cs index 732101aac..70a48e087 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Application/Authentication/OAuthAuthentication.cs +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Application/Authentication/OAuthAuthentication.cs @@ -3,7 +3,6 @@ using Microsoft.Teams.AI.Exceptions; using Microsoft.Teams.AI.State; using System.Runtime.CompilerServices; -using System.Threading; [assembly: InternalsVisibleTo("Microsoft.Teams.AI.Tests")] namespace Microsoft.Teams.AI @@ -133,6 +132,9 @@ public async Task SignOutUserAsync(ITurnContext context, TState state, Cancellat await UserTokenClientWrapper.SignoutUserAsync(context, _settings.ConnectionName, cancellationToken); } + /// + /// Get user token + /// protected virtual async Task GetUserToken(ITurnContext context, string connectionName, CancellationToken cancellationToken = default) { return await UserTokenClientWrapper.GetUserTokenAsync(context, connectionName, "", cancellationToken); diff --git a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Microsoft.Teams.AI.csproj b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Microsoft.Teams.AI.csproj index 90551ddc2..8f5ec700a 100644 --- a/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Microsoft.Teams.AI.csproj +++ b/dotnet/packages/Microsoft.TeamsAI/Microsoft.TeamsAI/Microsoft.Teams.AI.csproj @@ -45,7 +45,7 @@ - + diff --git a/dotnet/packages/Microsoft.TeamsAI/nuget.config b/dotnet/packages/Microsoft.TeamsAI/nuget.config new file mode 100644 index 000000000..45a49fc03 --- /dev/null +++ b/dotnet/packages/Microsoft.TeamsAI/nuget.config @@ -0,0 +1,7 @@ + + + + + + +