Skip to content

Commit d86fc7e

Browse files
authored
enhance: use bytes for dataset element contents (#64)
Signed-off-by: Grant Linville <[email protected]>
1 parent b50cbe6 commit d86fc7e

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

gptscript/datasets.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import base64
12
from typing import Dict
2-
from pydantic import BaseModel
3+
from pydantic import BaseModel, field_serializer, field_validator, BeforeValidator
4+
35

46
class DatasetElementMeta(BaseModel):
57
name: str
@@ -9,7 +11,17 @@ class DatasetElementMeta(BaseModel):
911
class DatasetElement(BaseModel):
1012
name: str
1113
description: str
12-
contents: str
14+
contents: bytes
15+
16+
@field_serializer("contents")
17+
def serialize_contents(self, value: bytes) -> str:
18+
return base64.b64encode(value).decode("utf-8")
19+
20+
@field_validator("contents", mode="before")
21+
def deserialize_contents(cls, value) -> bytes:
22+
if isinstance(value, str):
23+
return base64.b64decode(value)
24+
return value
1325

1426

1527
class DatasetMeta(BaseModel):

gptscript/gptscript.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ async def create_dataset(self, workspace_id: str, name: str, description: str =
242242
)
243243
return Dataset.model_validate_json(res)
244244

245-
async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: str,
245+
async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: bytes,
246246
elementDescription: str = "") -> DatasetElementMeta:
247247
if workspace_id == "":
248248
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
@@ -251,7 +251,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa
251251
raise ValueError("datasetID cannot be empty")
252252
elif elementName == "":
253253
raise ValueError("elementName cannot be empty")
254-
elif elementContent == "":
254+
elif not elementContent:
255255
raise ValueError("elementContent cannot be empty")
256256

257257
res = await self._run_basic_command(
@@ -260,7 +260,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa
260260
"input": json.dumps({
261261
"datasetID": datasetID,
262262
"elementName": elementName,
263-
"elementContent": elementContent,
263+
"elementContent": base64.b64encode(elementContent).decode("utf-8"),
264264
"elementDescription": elementDescription,
265265
}),
266266
"workspaceID": workspace_id,

tests/test_gptscript.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -771,29 +771,29 @@ async def test_datasets(gptscript):
771771
assert len(dataset.elements) == 0, "Expected dataset elements to be empty"
772772

773773
# Add an element
774-
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", "element1 contents",
774+
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", b"element1 contents",
775775
"element1 description")
776776
assert element_meta.name == "element1", "Expected element name to match"
777777
assert element_meta.description == "element1 description", "Expected element description to match"
778778

779779
# Add two more elements
780780
await gptscript.add_dataset_elements(workspace_id, dataset.id, [
781-
DatasetElement(name="element2", contents="element2 contents", description="element2 description"),
782-
DatasetElement(name="element3", contents="element3 contents", description="element3 description"),
781+
DatasetElement(name="element2", contents=b"element2 contents", description="element2 description"),
782+
DatasetElement(name="element3", contents=b"element3 contents", description="element3 description"),
783783
])
784784

785785
# Get the elements
786786
e1 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element1")
787787
assert e1.name == "element1", "Expected element name to match"
788-
assert e1.contents == "element1 contents", "Expected element contents to match"
788+
assert e1.contents == b"element1 contents", "Expected element contents to match"
789789
assert e1.description == "element1 description", "Expected element description to match"
790790
e2 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element2")
791791
assert e2.name == "element2", "Expected element name to match"
792-
assert e2.contents == "element2 contents", "Expected element contents to match"
792+
assert e2.contents == b"element2 contents", "Expected element contents to match"
793793
assert e2.description == "element2 description", "Expected element description to match"
794794
e3 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element3")
795795
assert e3.name == "element3", "Expected element name to match"
796-
assert e3.contents == "element3 contents", "Expected element contents to match"
796+
assert e3.contents == b"element3 contents", "Expected element contents to match"
797797
assert e3.description == "element3 description", "Expected element description to match"
798798

799799
# List elements in the dataset

0 commit comments

Comments
 (0)