Skip to content

Commit ba040ce

Browse files
authored
chore: update for dataset rewrite (#83)
Signed-off-by: Grant Linville <[email protected]>
1 parent df4a8c9 commit ba040ce

File tree

3 files changed

+92
-155
lines changed

3 files changed

+92
-155
lines changed

datasets.go

+42-127
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ package gptscript
22

33
import (
44
"context"
5-
"encoding/base64"
65
"encoding/json"
76
"fmt"
8-
"os"
97
)
108

119
type DatasetElementMeta struct {
@@ -15,7 +13,8 @@ type DatasetElementMeta struct {
1513

1614
type DatasetElement struct {
1715
DatasetElementMeta `json:",inline"`
18-
Contents []byte `json:"contents"`
16+
Contents string `json:"contents"`
17+
BinaryContents []byte `json:"binaryContents"`
1918
}
2019

2120
type DatasetMeta struct {
@@ -24,34 +23,17 @@ type DatasetMeta struct {
2423
Description string `json:"description"`
2524
}
2625

27-
type Dataset struct {
28-
DatasetMeta `json:",inline"`
29-
BaseDir string `json:"baseDir,omitempty"`
30-
Elements map[string]DatasetElementMeta `json:"elements"`
31-
}
32-
3326
type datasetRequest struct {
34-
Input string `json:"input"`
35-
WorkspaceID string `json:"workspaceID"`
36-
DatasetToolRepo string `json:"datasetToolRepo"`
37-
Env []string `json:"env"`
38-
}
39-
40-
type createDatasetArgs struct {
41-
Name string `json:"datasetName"`
42-
Description string `json:"datasetDescription"`
43-
}
44-
45-
type addDatasetElementArgs struct {
46-
DatasetID string `json:"datasetID"`
47-
ElementName string `json:"elementName"`
48-
ElementDescription string `json:"elementDescription"`
49-
ElementContent string `json:"elementContent"`
27+
Input string `json:"input"`
28+
DatasetTool string `json:"datasetTool"`
29+
Env []string `json:"env"`
5030
}
5131

5232
type addDatasetElementsArgs struct {
53-
DatasetID string `json:"datasetID"`
54-
Elements []DatasetElement `json:"elements"`
33+
DatasetID string `json:"datasetID"`
34+
Name string `json:"name"`
35+
Description string `json:"description"`
36+
Elements []DatasetElement `json:"elements"`
5537
}
5638

5739
type listDatasetElementArgs struct {
@@ -60,19 +42,14 @@ type listDatasetElementArgs struct {
6042

6143
type getDatasetElementArgs struct {
6244
DatasetID string `json:"datasetID"`
63-
Element string `json:"element"`
45+
Element string `json:"name"`
6446
}
6547

66-
func (g *GPTScript) ListDatasets(ctx context.Context, workspaceID string) ([]DatasetMeta, error) {
67-
if workspaceID == "" {
68-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
69-
}
70-
48+
func (g *GPTScript) ListDatasets(ctx context.Context) ([]DatasetMeta, error) {
7149
out, err := g.runBasicCommand(ctx, "datasets", datasetRequest{
72-
Input: "{}",
73-
WorkspaceID: workspaceID,
74-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
75-
Env: g.globalOpts.Env,
50+
Input: "{}",
51+
DatasetTool: g.globalOpts.DatasetTool,
52+
Env: g.globalOpts.Env,
7653
})
7754
if err != nil {
7855
return nil, err
@@ -85,98 +62,42 @@ func (g *GPTScript) ListDatasets(ctx context.Context, workspaceID string) ([]Dat
8562
return datasets, nil
8663
}
8764

88-
func (g *GPTScript) CreateDataset(ctx context.Context, workspaceID, name, description string) (Dataset, error) {
89-
if workspaceID == "" {
90-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
91-
}
92-
93-
args := createDatasetArgs{
94-
Name: name,
95-
Description: description,
96-
}
97-
argsJSON, err := json.Marshal(args)
98-
if err != nil {
99-
return Dataset{}, fmt.Errorf("failed to marshal dataset args: %w", err)
100-
}
101-
102-
out, err := g.runBasicCommand(ctx, "datasets/create", datasetRequest{
103-
Input: string(argsJSON),
104-
WorkspaceID: workspaceID,
105-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
106-
Env: g.globalOpts.Env,
107-
})
108-
if err != nil {
109-
return Dataset{}, err
110-
}
111-
112-
var dataset Dataset
113-
if err = json.Unmarshal([]byte(out), &dataset); err != nil {
114-
return Dataset{}, err
115-
}
116-
return dataset, nil
65+
type DatasetOptions struct {
66+
Name, Description string
11767
}
11868

119-
func (g *GPTScript) AddDatasetElement(ctx context.Context, workspaceID, datasetID, elementName, elementDescription string, elementContent []byte) (DatasetElementMeta, error) {
120-
if workspaceID == "" {
121-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
122-
}
123-
124-
args := addDatasetElementArgs{
125-
DatasetID: datasetID,
126-
ElementName: elementName,
127-
ElementDescription: elementDescription,
128-
ElementContent: base64.StdEncoding.EncodeToString(elementContent),
129-
}
130-
argsJSON, err := json.Marshal(args)
131-
if err != nil {
132-
return DatasetElementMeta{}, fmt.Errorf("failed to marshal element args: %w", err)
133-
}
134-
135-
out, err := g.runBasicCommand(ctx, "datasets/add-element", datasetRequest{
136-
Input: string(argsJSON),
137-
WorkspaceID: workspaceID,
138-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
139-
Env: g.globalOpts.Env,
140-
})
141-
if err != nil {
142-
return DatasetElementMeta{}, err
143-
}
144-
145-
var element DatasetElementMeta
146-
if err = json.Unmarshal([]byte(out), &element); err != nil {
147-
return DatasetElementMeta{}, err
148-
}
149-
return element, nil
69+
func (g *GPTScript) CreateDatasetWithElements(ctx context.Context, elements []DatasetElement, options ...DatasetOptions) (string, error) {
70+
return g.AddDatasetElements(ctx, "", elements, options...)
15071
}
15172

152-
func (g *GPTScript) AddDatasetElements(ctx context.Context, workspaceID, datasetID string, elements []DatasetElement) error {
153-
if workspaceID == "" {
154-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
155-
}
156-
73+
func (g *GPTScript) AddDatasetElements(ctx context.Context, datasetID string, elements []DatasetElement, options ...DatasetOptions) (string, error) {
15774
args := addDatasetElementsArgs{
15875
DatasetID: datasetID,
15976
Elements: elements,
16077
}
78+
79+
for _, opt := range options {
80+
if opt.Name != "" {
81+
args.Name = opt.Name
82+
}
83+
if opt.Description != "" {
84+
args.Description = opt.Description
85+
}
86+
}
87+
16188
argsJSON, err := json.Marshal(args)
16289
if err != nil {
163-
return fmt.Errorf("failed to marshal element args: %w", err)
90+
return "", fmt.Errorf("failed to marshal element args: %w", err)
16491
}
16592

166-
_, err = g.runBasicCommand(ctx, "datasets/add-elements", datasetRequest{
167-
Input: string(argsJSON),
168-
WorkspaceID: workspaceID,
169-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
170-
Env: g.globalOpts.Env,
93+
return g.runBasicCommand(ctx, "datasets/add-elements", datasetRequest{
94+
Input: string(argsJSON),
95+
DatasetTool: g.globalOpts.DatasetTool,
96+
Env: g.globalOpts.Env,
17197
})
172-
return err
17398
}
17499

175-
func (g *GPTScript) ListDatasetElements(ctx context.Context, workspaceID, datasetID string) ([]DatasetElementMeta, error) {
176-
if workspaceID == "" {
177-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
178-
}
179-
100+
func (g *GPTScript) ListDatasetElements(ctx context.Context, datasetID string) ([]DatasetElementMeta, error) {
180101
args := listDatasetElementArgs{
181102
DatasetID: datasetID,
182103
}
@@ -186,10 +107,9 @@ func (g *GPTScript) ListDatasetElements(ctx context.Context, workspaceID, datase
186107
}
187108

188109
out, err := g.runBasicCommand(ctx, "datasets/list-elements", datasetRequest{
189-
Input: string(argsJSON),
190-
WorkspaceID: workspaceID,
191-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
192-
Env: g.globalOpts.Env,
110+
Input: string(argsJSON),
111+
DatasetTool: g.globalOpts.DatasetTool,
112+
Env: g.globalOpts.Env,
193113
})
194114
if err != nil {
195115
return nil, err
@@ -202,11 +122,7 @@ func (g *GPTScript) ListDatasetElements(ctx context.Context, workspaceID, datase
202122
return elements, nil
203123
}
204124

205-
func (g *GPTScript) GetDatasetElement(ctx context.Context, workspaceID, datasetID, elementName string) (DatasetElement, error) {
206-
if workspaceID == "" {
207-
workspaceID = os.Getenv("GPTSCRIPT_WORKSPACE_ID")
208-
}
209-
125+
func (g *GPTScript) GetDatasetElement(ctx context.Context, datasetID, elementName string) (DatasetElement, error) {
210126
args := getDatasetElementArgs{
211127
DatasetID: datasetID,
212128
Element: elementName,
@@ -217,10 +133,9 @@ func (g *GPTScript) GetDatasetElement(ctx context.Context, workspaceID, datasetI
217133
}
218134

219135
out, err := g.runBasicCommand(ctx, "datasets/get-element", datasetRequest{
220-
Input: string(argsJSON),
221-
WorkspaceID: workspaceID,
222-
DatasetToolRepo: g.globalOpts.DatasetToolRepo,
223-
Env: g.globalOpts.Env,
136+
Input: string(argsJSON),
137+
DatasetTool: g.globalOpts.DatasetTool,
138+
Env: g.globalOpts.Env,
224139
})
225140
if err != nil {
226141
return DatasetElement{}, err

datasets_test.go

+48-26
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gptscript
22

33
import (
44
"context"
5+
"os"
56
"testing"
67

78
"github.com/stretchr/testify/require"
@@ -11,66 +12,87 @@ func TestDatasets(t *testing.T) {
1112
workspaceID, err := g.CreateWorkspace(context.Background(), "directory")
1213
require.NoError(t, err)
1314

15+
client, err := NewGPTScript(GlobalOptions{
16+
OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"),
17+
Env: append(os.Environ(), "GPTSCRIPT_WORKSPACE_ID="+workspaceID),
18+
})
19+
require.NoError(t, err)
20+
1421
defer func() {
1522
_ = g.DeleteWorkspace(context.Background(), workspaceID)
1623
}()
1724

18-
// Create a dataset
19-
dataset, err := g.CreateDataset(context.Background(), workspaceID, "test-dataset", "This is a test dataset")
20-
require.NoError(t, err)
21-
require.Equal(t, "test-dataset", dataset.Name)
22-
require.Equal(t, "This is a test dataset", dataset.Description)
23-
require.Equal(t, 0, len(dataset.Elements))
24-
25-
// Add an element
26-
elementMeta, err := g.AddDatasetElement(context.Background(), workspaceID, dataset.ID, "test-element", "This is a test element", []byte("This is the content"))
25+
datasetID, err := client.CreateDatasetWithElements(context.Background(), []DatasetElement{
26+
{
27+
DatasetElementMeta: DatasetElementMeta{
28+
Name: "test-element-1",
29+
Description: "This is a test element 1",
30+
},
31+
Contents: "This is the content 1",
32+
},
33+
}, DatasetOptions{
34+
Name: "test-dataset",
35+
Description: "this is a test dataset",
36+
})
2737
require.NoError(t, err)
28-
require.Equal(t, "test-element", elementMeta.Name)
29-
require.Equal(t, "This is a test element", elementMeta.Description)
3038

31-
// Add two more
32-
err = g.AddDatasetElements(context.Background(), workspaceID, dataset.ID, []DatasetElement{
39+
// Add three more elements
40+
_, err = client.AddDatasetElements(context.Background(), datasetID, []DatasetElement{
3341
{
3442
DatasetElementMeta: DatasetElementMeta{
3543
Name: "test-element-2",
3644
Description: "This is a test element 2",
3745
},
38-
Contents: []byte("This is the content 2"),
46+
Contents: "This is the content 2",
3947
},
4048
{
4149
DatasetElementMeta: DatasetElementMeta{
4250
Name: "test-element-3",
4351
Description: "This is a test element 3",
4452
},
45-
Contents: []byte("This is the content 3"),
53+
Contents: "This is the content 3",
54+
},
55+
{
56+
DatasetElementMeta: DatasetElementMeta{
57+
Name: "binary-element",
58+
Description: "this element has binary contents",
59+
},
60+
BinaryContents: []byte("binary contents"),
4661
},
4762
})
4863
require.NoError(t, err)
4964

5065
// Get the first element
51-
element, err := g.GetDatasetElement(context.Background(), workspaceID, dataset.ID, "test-element")
66+
element, err := client.GetDatasetElement(context.Background(), datasetID, "test-element-1")
5267
require.NoError(t, err)
53-
require.Equal(t, "test-element", element.Name)
54-
require.Equal(t, "This is a test element", element.Description)
55-
require.Equal(t, []byte("This is the content"), element.Contents)
68+
require.Equal(t, "test-element-1", element.Name)
69+
require.Equal(t, "This is a test element 1", element.Description)
70+
require.Equal(t, "This is the content 1", element.Contents)
5671

5772
// Get the third element
58-
element, err = g.GetDatasetElement(context.Background(), workspaceID, dataset.ID, "test-element-3")
73+
element, err = client.GetDatasetElement(context.Background(), datasetID, "test-element-3")
5974
require.NoError(t, err)
6075
require.Equal(t, "test-element-3", element.Name)
6176
require.Equal(t, "This is a test element 3", element.Description)
62-
require.Equal(t, []byte("This is the content 3"), element.Contents)
77+
require.Equal(t, "This is the content 3", element.Contents)
78+
79+
// Get the binary element
80+
element, err = client.GetDatasetElement(context.Background(), datasetID, "binary-element")
81+
require.NoError(t, err)
82+
require.Equal(t, "binary-element", element.Name)
83+
require.Equal(t, "this element has binary contents", element.Description)
84+
require.Equal(t, []byte("binary contents"), element.BinaryContents)
6385

6486
// List elements in the dataset
65-
elements, err := g.ListDatasetElements(context.Background(), workspaceID, dataset.ID)
87+
elements, err := client.ListDatasetElements(context.Background(), datasetID)
6688
require.NoError(t, err)
67-
require.Equal(t, 3, len(elements))
89+
require.Equal(t, 4, len(elements))
6890

6991
// List datasets
70-
datasets, err := g.ListDatasets(context.Background(), workspaceID)
92+
datasets, err := client.ListDatasets(context.Background())
7193
require.NoError(t, err)
7294
require.Equal(t, 1, len(datasets))
95+
require.Equal(t, datasetID, datasets[0].ID)
7396
require.Equal(t, "test-dataset", datasets[0].Name)
74-
require.Equal(t, "This is a test dataset", datasets[0].Description)
75-
require.Equal(t, dataset.ID, datasets[0].ID)
97+
require.Equal(t, "this is a test dataset", datasets[0].Description)
7698
}

opts.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ type GlobalOptions struct {
1111
DefaultModelProvider string `json:"DefaultModelProvider"`
1212
CacheDir string `json:"CacheDir"`
1313
Env []string `json:"env"`
14-
DatasetToolRepo string `json:"DatasetToolRepo"`
14+
DatasetTool string `json:"DatasetTool"`
1515
WorkspaceTool string `json:"WorkspaceTool"`
1616
}
1717

@@ -46,7 +46,7 @@ func completeGlobalOptions(opts ...GlobalOptions) GlobalOptions {
4646
result.OpenAIBaseURL = firstSet(opt.OpenAIBaseURL, result.OpenAIBaseURL)
4747
result.DefaultModel = firstSet(opt.DefaultModel, result.DefaultModel)
4848
result.DefaultModelProvider = firstSet(opt.DefaultModelProvider, result.DefaultModelProvider)
49-
result.DatasetToolRepo = firstSet(opt.DatasetToolRepo, result.DatasetToolRepo)
49+
result.DatasetTool = firstSet(opt.DatasetTool, result.DatasetTool)
5050
result.WorkspaceTool = firstSet(opt.WorkspaceTool, result.WorkspaceTool)
5151
result.Env = append(result.Env, opt.Env...)
5252
}

0 commit comments

Comments
 (0)