-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathINPUT_SCHEMA.json
222 lines (222 loc) · 10.2 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
{
"title": "Images upload input",
"type": "object",
"schemaVersion": 1,
"required": [],
"properties": {
"datasetId": {
"title": "Dataset Id",
"type": "string",
"description": "Id of the dataset where the data are located. Image URLs will be extracted from there.",
"editor": "textfield"
},
"items": {
"title": "Items",
"type": "array",
"description": "Array of items that includes the image URLs.",
"editor": "hidden"
},
"pathToImageUrls": {
"title": "Path to image URLs",
"type": "string",
"description": "Path from item object to an array or string where the URL(s) is/are located. Provide in \"javascript style\", e.g. \"details[0].images\n",
"editor": "textfield",
"prefill": "images"
},
"fileNameFunction": {
"title": "Filename function",
"type": "string",
"description": "Function that specifies how will be image filename created from its URL. If you keep this empty, it will be md5 hash of the URL.",
"editor": "javascript",
"prefill": "({url, md5}) => md5(url)"
},
"limit": {
"title": "Limit",
"type": "integer",
"description": "Max items to load from the dataset. Use with `offset` to paginate over the data (can reduce memory requirement of large loads).",
"minimum": 0,
"sectionCaption": "Input/Output options",
"sectionDescription": "More options for input data and where output data will be saved. Output data are transformed input data, not images. For them use upload options"
},
"offset": {
"title": "Offset",
"type": "integer",
"description": "How many items to skip from the dataset. Use with `limit` to paginate over the data (can reduce memory requirement of large loads)",
"minimum": 0
},
"outputTo": {
"title": "Output to",
"type": "string",
"description": "Where to save the data from input after possibly transforming them during the download process.",
"enum": [
"no-output",
"key-value-store",
"dataset"
],
"enumTitles": [
"No output",
"Key-value store",
"Dataset"
]
},
"outputDatasetId": {
"title": "Output dataset Name or ID",
"type": "string",
"description": "Name or ID of the dataset where the data will be saved. Only relevant if you want to output to dataset!",
"editor": "textfield"
},
"storeInput": {
"title": "Key Value store input",
"type": "string",
"description": "If you want to input the data from key-value store instead of dataset. Notation: `storeId-recordKey`, e.g. - `kWdGzuXuKfYkrntWw-OUTPUT`",
"editor": "textfield"
},
"uploadTo": {
"title": "Upload to",
"type": "string",
"description": "Where do you want to upload the image files",
"enum": [
"zip-file",
"key-value-store",
"s3",
"no-upload"
],
"enumTitles": [
"Zip file (Can't be used with output to dataset)",
"Key-value store",
"S3",
"No upload"
],
"prefill": "zip-file",
"sectionCaption": "Image upload options"
},
"uploadStoreName": {
"title": "Key-value store name",
"type": "string",
"description": "Key-value store name where the images will be upload. Empty field means it will be uploaded to the default key-value store",
"editor": "textfield"
},
"s3Bucket": {
"title": "S3 Bucket",
"type": "string",
"description": "Only relevant if you want to upload to S3! Name of the bucket where to upload.",
"editor": "textfield"
},
"s3AccessKeyId": {
"title": "S3 Access key id",
"type": "string",
"description": "Only relevant if you want to upload to S3! You can create these credentials for IAM user.",
"editor": "textfield"
},
"s3SecretAccessKey": {
"title": "S3 Secret access key",
"type": "string",
"description": "Only relevant if you want to upload to S3! You can create these credentials for IAM user.",
"editor": "textfield"
},
"s3CheckIfAlreadyThere": {
"title": "Check if key is already on S3",
"type": "boolean",
"description": "This option is useful if you don't want to rewrite the same image. GET requests are also cheaper than PUT requests"
},
"preDownloadFunction": {
"title": "Pre-download function",
"type": "string",
"description": "Function that specifies how will be the data transformed before downloading the image. The input and output of the function is the whole data array. You can skip downloading images of any item if you add skipItem: true field to it.",
"editor": "javascript",
"prefill": "/* Example: We get rid of the items with price 0\n({ data }) => data.filter((item) => item.price > 0)\n*/",
"sectionCaption": "Transforming functions",
"sectionDescription": "You can use these functions to transform your data before and/or after the image download process."
},
"postDownloadFunction": {
"title": "Post-download function",
"type": "string",
"description": "Function that specifies how will be the data transformed before downloading the image. The input and output of the function is the whole data array. By default it adds either the file URL or errors array depending if the download was successfull.",
"editor": "javascript",
"prefill": "/* Example: We remove items without any successfully uploaded images.\n We also remove any image URLs that were not uploaded\n \n ({ data, state }) => {\n return data.reduce((newData, item) => {\n const downloadedImages = item.images.filter((imageUrl) => {\n return state[imageUrl] && state[imageUrl].imageUploaded;\n });\n \n if (downloadedImages.length === 0) {\n return newData;\n }\n \n return newData.concat({ ...item, images: downloadedImages });\n }, []);\n}\n*/"
},
"imageCheckMaxRetries": {
"title": "Max retries",
"type": "integer",
"description": "How many times should actor retry if the file it tries to download fails to pass the tests. Setting this too high can lead to unecessary loops.",
"minimum": 1,
"default": 6,
"sectionCaption": "Image quality check options"
},
"imageCheckType": {
"title": "Image check type",
"type": "string",
"description": "Type of the image check. If the image will not pass, the download will be retied with proxy and if that doesn't pass, the image is not uploaded.",
"enum": [
"none",
"content-type",
"image-size"
],
"prefill": "content-type"
},
"imageCheckMinSize": {
"title": "Min size in KB",
"type": "integer",
"description": "Minimum size of the image to pass the image check test",
"minimum": 1
},
"imageCheckMinWidth": {
"title": "Min width",
"type": "integer",
"description": "Minimim width of the image in pixels to pass the image check. Works only if the image check type is 'jimp'.",
"minimum": 1
},
"imageCheckMinHeight": {
"title": "Min height",
"type": "integer",
"description": "Minimim height of the image in pixels to pass the image check. Works only if the image check type is 'jimp'.",
"minimum": 1
},
"proxyConfiguration": {
"title": "Proxy configuration",
"type": "object",
"description": "Select proxies to be used.",
"prefill": {
"useApifyProxy": true
},
"editor": "proxy",
"sectionCaption": "Miscellaneous options"
},
"maxConcurrency": {
"title": "Max concurrency",
"type": "integer",
"description": "You can specify how many maximum parallel downloading/uploading requests will be running. Keep in mind that the limit is here to not overload the host server.",
"default": 40
},
"downloadTimeout": {
"title": "Download timeout in ms",
"type": "integer",
"description": "How long we will wait to download each image",
"minimum": 1000,
"default": 15000
},
"batchSize": {
"title": "Batch Size",
"type": "integer",
"description": "Number of items loaded from dataset in one batch.",
"minimum": 1,
"default": 10000
},
"convertWebpToPng": {
"title": "Convert webp to png",
"type": "boolean",
"description": "If checked, the actor will automatically convert all webp type images to standard png. This increases the size of the image."
},
"stateFields": {
"title": "State fields",
"type": "array",
"editor": "json",
"description": "You can specify fields that you want in your state to make it more readable and use less memory. By default it uses all."
},
"noDownloadRun": {
"title": "Run without download",
"type": "boolean",
"description": "If checked, the actor will not download and upload the images. Usefull for checking duplicates or transformations."
}
}
}