Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MessageHandlerHelper 中添加使用 Unicode 拆分字符的方法 #177

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/Senparc.NeuChar.Tests/Helpers/MessageHandlerHelperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,35 @@ public void SubstringByByteTest()


}

[TestMethod]
public void ChunkStringByUnicode()
{
var limit = 10;
var text = "Senparc.NeuChar🤝 跨平台信息交互🔄标准🌍。使用 NeuChar 标准💬可以跨平台🌐兼容不同平台的交互🔀信息设置,一次设置🚀,多平台共享📱🖥。";

var results = MessageHandlerHelper.ChunkStringByUnicode(text, limit);

foreach (var result in results)
{
var bytes = Encoding.UTF8.GetBytes(result);
Assert.IsTrue(bytes.Length <= limit);
}
}

[TestMethod]
public async Task HandleLimitedTextAsync()
{
var limit = 10;
var text = "Senparc.NeuChar🤝 跨平台信息交互🔄标准🌍。使用 NeuChar 标准💬可以跨平台🌐兼容不同平台的交互🔀信息设置,一次设置🚀,多平台共享📱🖥。";

var results = await MessageHandlerHelper.TryHandleLimitedText(text, limit, chunk =>
{
return Task.FromResult(chunk);
});

Assert.AreEqual(text, string.Join(string.Empty, results));
}

}
}
52 changes: 52 additions & 0 deletions src/Senparc.NeuChar/Helpers/MessageHandlerHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ and limitations under the License.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Security.Principal;
using System.Text;
Expand Down Expand Up @@ -219,5 +220,56 @@ public static async Task<T> TrySendLimistedText<T>(string accessTokenOrAppId, st

return null;//不做处理
}

/// <summary>
/// 尝试使用Unicode编码分批处理超长的文本内容,返回处理结果集合
/// </summary>
/// <param name="content">文本内容</param>
/// <param name="limitedBytes">每段文本的限制长度</param>
/// <param name="handleTextFuncAsync">处理方法</param>
/// <returns>处理结果集合</returns>
public static async Task<IEnumerable<T>> TryHandleLimitedText<T>(string content, int limitedBytes, Func<string, Task<T>> handleTextFuncAsync)
where T : class
{
List<T> results = new();

if (limitedBytes > 0)
{
foreach (var chunk in ChunkStringByUnicode(content, limitedBytes))
{
results.Add(await handleTextFuncAsync(chunk));
}
}

return results;
}

/// <summary>
/// 使用Unicode编码对文本进行拆分
/// </summary>
/// <param name="text">文本内容</param>
/// <param name="chunkSize">分片大小</param>
/// <returns></returns>
public static IEnumerable<string> ChunkStringByUnicode(string text, int chunkSize)
{
var stringBuilder = new StringBuilder();
var byteSize = 0;
TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator(text);

while (enumerator.MoveNext())
{
string unicodeCharacter = enumerator.GetTextElement();
var b = Encoding.UTF8.GetBytes(unicodeCharacter);
if (byteSize + b.Length >= chunkSize)
{
yield return stringBuilder.ToString();
stringBuilder.Clear();
byteSize = 0;
}
byteSize += b.Length;
stringBuilder.Append(unicodeCharacter);
}
yield return stringBuilder.ToString();
}
}
}