Skip to content

Commit

Permalink
Merge pull request #973 from iorisa/fixbug/gbk
Browse files Browse the repository at this point in the history
fixbug: gbk UnicodeEncodeError
  • Loading branch information
geekan authored Mar 8, 2024
2 parents fe9dae1 + 9d325d4 commit 283c7b4
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 42 deletions.
5 changes: 2 additions & 3 deletions metagpt/learn/skill_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from pathlib import Path
from typing import Dict, List, Optional

import aiofiles
import yaml
from pydantic import BaseModel, Field

from metagpt.context import Context
from metagpt.utils.common import aread


class Example(BaseModel):
Expand Down Expand Up @@ -68,8 +68,7 @@ class SkillsDeclaration(BaseModel):
async def load(skill_yaml_file_name: Path = None) -> "SkillsDeclaration":
if not skill_yaml_file_name:
skill_yaml_file_name = Path(__file__).parent.parent.parent / "docs/.well-known/skills.yaml"
async with aiofiles.open(str(skill_yaml_file_name), mode="r") as reader:
data = await reader.read(-1)
data = await aread(filename=skill_yaml_file_name)
skill_data = yaml.safe_load(data)
return SkillsDeclaration(**skill_data)

Expand Down
16 changes: 12 additions & 4 deletions metagpt/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from typing import Any, Callable, List, Tuple, Union

import aiofiles
import chardet
import loguru
import requests
from PIL import Image
Expand Down Expand Up @@ -587,14 +588,21 @@ async def wrapper(self, *args, **kwargs):


@handle_exception
async def aread(filename: str | Path, encoding=None) -> str:
async def aread(filename: str | Path, encoding="utf-8") -> str:
"""Read file asynchronously."""
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
try:
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
except UnicodeDecodeError:
async with aiofiles.open(str(filename), mode="rb") as reader:
raw = await reader.read()
result = chardet.detect(raw)
detected_encoding = result["encoding"]
content = raw.decode(detected_encoding)
return content


async def awrite(filename: str | Path, data: str, encoding=None):
async def awrite(filename: str | Path, data: str, encoding="utf-8"):
"""Write file asynchronously."""
pathname = Path(filename)
pathname.parent.mkdir(parents=True, exist_ok=True)
Expand Down
7 changes: 2 additions & 5 deletions metagpt/utils/dependency_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
from pathlib import Path
from typing import Set

import aiofiles

from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.exceptions import handle_exception


Expand Down Expand Up @@ -45,8 +43,7 @@ async def load(self):
async def save(self):
"""Save dependencies to the file asynchronously."""
data = json.dumps(self._dependencies)
async with aiofiles.open(str(self._filename), mode="w") as writer:
await writer.write(data)
await awrite(filename=self._filename, data=data)

async def update(self, filename: Path | str, dependencies: Set[Path | str], persist=True):
"""Update dependencies for a file asynchronously.
Expand Down
7 changes: 2 additions & 5 deletions metagpt/utils/file_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
from pathlib import Path
from typing import Dict, List, Set

import aiofiles

from metagpt.logs import logger
from metagpt.schema import Document
from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.json_to_markdown import json_to_markdown


Expand Down Expand Up @@ -55,8 +53,7 @@ async def save(self, filename: Path | str, content, dependencies: List[str] = No
pathname = self.workdir / filename
pathname.parent.mkdir(parents=True, exist_ok=True)
content = content if content else "" # avoid `argument must be str, not None` to make it continue
async with aiofiles.open(str(pathname), mode="w") as writer:
await writer.write(content)
await awrite(filename=str(pathname), data=content)
logger.info(f"save to: {str(pathname)}")

if dependencies is not None:
Expand Down
8 changes: 2 additions & 6 deletions metagpt/utils/mermaid.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
import os
from pathlib import Path

import aiofiles

from metagpt.config2 import config
from metagpt.logs import logger
from metagpt.utils.common import check_cmd_exists
from metagpt.utils.common import awrite, check_cmd_exists


async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int:
Expand All @@ -30,9 +28,7 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
tmp = Path(f"{output_file_without_suffix}.mmd")
async with aiofiles.open(tmp, "w", encoding="utf-8") as f:
await f.write(mermaid_code)
# tmp.write_text(mermaid_code, encoding="utf-8")
await awrite(filename=tmp, data=mermaid_code)

if engine == "nodejs":
if check_cmd_exists(config.mermaid.path) != 0:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run(self):

setup(
name="metagpt",
version="0.7.5",
version="0.7.6",
description="The Multi-Agent Framework",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
7 changes: 3 additions & 4 deletions tests/metagpt/roles/test_tutorial_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
@File : test_tutorial_assistant.py
"""

import aiofiles
import pytest

from metagpt.const import TUTORIAL_PATH
from metagpt.roles.tutorial_assistant import TutorialAssistant
from metagpt.utils.common import aread


@pytest.mark.asyncio
Expand All @@ -20,9 +20,8 @@ async def test_tutorial_assistant(language: str, topic: str, context):
msg = await role.run(topic)
assert TUTORIAL_PATH.exists()
filename = msg.content
async with aiofiles.open(filename, mode="r", encoding="utf-8") as reader:
content = await reader.read()
assert "pip" in content
content = await aread(filename=filename)
assert "pip" in content


if __name__ == "__main__":
Expand Down
20 changes: 15 additions & 5 deletions tests/metagpt/utils/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pathlib import Path
from typing import Any, Set

import aiofiles
import pytest
from pydantic import BaseModel

Expand Down Expand Up @@ -125,9 +124,7 @@ class Input(BaseModel):
async def test_parse_data_exception(self, filename, want):
pathname = Path(__file__).parent.parent.parent / "data/output_parser" / filename
assert pathname.exists()
async with aiofiles.open(str(pathname), mode="r") as reader:
data = await reader.read()

data = await aread(filename=pathname)
result = OutputParser.parse_data(data=data)
assert want in result

Expand Down Expand Up @@ -198,12 +195,25 @@ async def test_read_file_block(self):

@pytest.mark.asyncio
async def test_read_write(self):
pathname = Path(__file__).parent / uuid.uuid4().hex / "test.tmp"
pathname = Path(__file__).parent / f"../../../workspace/unittest/{uuid.uuid4().hex}" / "test.tmp"
await awrite(pathname, "ABC")
data = await aread(pathname)
assert data == "ABC"
pathname.unlink(missing_ok=True)

@pytest.mark.asyncio
async def test_read_write_error_charset(self):
pathname = Path(__file__).parent / f"../../../workspace/unittest/{uuid.uuid4().hex}" / "test.txt"
content = "中国abc123\u27f6"
await awrite(filename=pathname, data=content)
data = await aread(filename=pathname)
assert data == content

content = "GB18030 是中国国家标准局发布的新一代中文字符集标准,是 GBK 的升级版,支持更广泛的字符范围。"
await awrite(filename=pathname, data=content, encoding="gb2312")
data = await aread(filename=pathname, encoding="utf-8")
assert data == content


if __name__ == "__main__":
pytest.main([__file__, "-s"])
5 changes: 2 additions & 3 deletions tests/metagpt/utils/test_git_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
import shutil
from pathlib import Path

import aiofiles
import pytest

from metagpt.utils.common import awrite
from metagpt.utils.git_repository import GitRepository


async def mock_file(filename, content=""):
async with aiofiles.open(str(filename), mode="w") as file:
await file.write(content)
await awrite(filename=filename, data=content)


async def mock_repo(local_path) -> (GitRepository, Path):
Expand Down
8 changes: 2 additions & 6 deletions tests/metagpt/utils/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from pathlib import Path

import aioboto3
import aiofiles
import pytest

from metagpt.config2 import Config
Expand Down Expand Up @@ -37,16 +36,15 @@ async def test_s3(mocker):
conn = S3(s3)
object_name = "unittest.bak"
await conn.upload_file(bucket=s3.bucket, local_path=__file__, object_name=object_name)
pathname = (Path(__file__).parent / uuid.uuid4().hex).with_suffix(".bak")
pathname = (Path(__file__).parent / "../../../workspace/unittest" / uuid.uuid4().hex).with_suffix(".bak")
pathname.unlink(missing_ok=True)
await conn.download_file(bucket=s3.bucket, object_name=object_name, local_path=str(pathname))
assert pathname.exists()
url = await conn.get_object_url(bucket=s3.bucket, object_name=object_name)
assert url
bin_data = await conn.get_object(bucket=s3.bucket, object_name=object_name)
assert bin_data
async with aiofiles.open(__file__, mode="r", encoding="utf-8") as reader:
data = await reader.read()
data = await aread(filename=__file__)
res = await conn.cache(data, ".bak", "script")
assert "http" in res

Expand All @@ -60,8 +58,6 @@ async def test_s3(mocker):
except Exception:
pass

await reader.close()


if __name__ == "__main__":
pytest.main([__file__, "-s"])

0 comments on commit 283c7b4

Please sign in to comment.