@@ -58,20 +58,19 @@ def client(cls) -> IDCClient:
58
58
return cls ._client
59
59
60
60
def __init__ (self ):
61
+ # Read main index file
61
62
file_path = idc_index_data .IDC_INDEX_PARQUET_FILEPATH
62
-
63
- # Read index file
64
63
logger .debug (f"Reading index file v{ idc_index_data .__version__ } " )
65
64
self .index = pd .read_parquet (file_path )
66
65
# self.index = self.index.astype(str).replace("nan", "")
67
66
self .index ["series_size_MB" ] = self .index ["series_size_MB" ].astype (float )
68
67
self .collection_summary = self .index .groupby ("collection_id" ).agg (
69
68
{"Modality" : pd .Series .unique , "series_size_MB" : "sum" }
70
69
)
70
+ self .indices_overview = self .list_indices ()
71
71
72
72
# Lookup s5cmd
73
73
self .s5cmdPath = shutil .which ("s5cmd" )
74
-
75
74
if self .s5cmdPath is None :
76
75
# Workaround to support environment without a properly setup PATH
77
76
# See https://github.com/Slicer/Slicer/pull/7587
@@ -80,16 +79,12 @@ def __init__(self):
80
79
if str (script ).startswith ("s5cmd/bin/s5cmd" ):
81
80
self .s5cmdPath = script .locate ().resolve (strict = True )
82
81
break
83
-
84
82
if self .s5cmdPath is None :
85
83
raise FileNotFoundError (
86
84
"s5cmd executable not found. Please install s5cmd from https://github.com/peak/s5cmd#installation"
87
85
)
88
-
89
86
self .s5cmdPath = str (self .s5cmdPath )
90
-
91
87
logger .debug (f"Found s5cmd executable: { self .s5cmdPath } " )
92
-
93
88
# ... and check it can be executed
94
89
subprocess .check_call ([self .s5cmdPath , "--help" ], stdout = subprocess .DEVNULL )
95
90
@@ -177,6 +172,105 @@ def get_idc_version():
177
172
idc_version = Version (idc_index_data .__version__ ).major
178
173
return f"v{ idc_version } "
179
174
175
+ @staticmethod
176
+ def _get_latest_idc_index_data_release_assets ():
177
+ """
178
+ Retrieves a list of the latest idc-index-data release assets.
179
+
180
+ Returns:
181
+ release_assets (list): List of tuples (asset_name, asset_url).
182
+ """
183
+ release_assets = []
184
+ url = f"https://api.github.com/repos/ImagingDataCommons/idc-index-data/releases/tags/{ idc_index_data .__version__ } "
185
+ try :
186
+ response = requests .get (url , timeout = 30 )
187
+ if response .status_code == 200 :
188
+ release_data = response .json ()
189
+ assets = release_data .get ("assets" , [])
190
+ for asset in assets :
191
+ release_assets .append (
192
+ (asset ["name" ], asset ["browser_download_url" ])
193
+ )
194
+ else :
195
+ logger .error (f"Failed to fetch releases: { response .status_code } " )
196
+
197
+ except FileNotFoundError :
198
+ logger .error (f"Failed to fetch releases: { response .status_code } " )
199
+
200
+ return release_assets
201
+
202
+ def list_indices (self ):
203
+ """
204
+ Lists all available indices including their installation status.
205
+
206
+ Returns:
207
+ indices_overview (pd.DataFrame): DataFrame containing information per index.
208
+ """
209
+
210
+ if "indices_overview" not in locals ():
211
+ indices_overview = {}
212
+ # Find installed indices
213
+ for file in distribution ("idc-index-data" ).files :
214
+ if str (file ).endswith ("index.parquet" ):
215
+ index_name = os .path .splitext (
216
+ str (file ).rsplit ("/" , maxsplit = 1 )[- 1 ]
217
+ )[0 ]
218
+
219
+ indices_overview [index_name ] = {
220
+ "description" : None ,
221
+ "installed" : True ,
222
+ "local_path" : os .path .join (
223
+ idc_index_data .IDC_INDEX_PARQUET_FILEPATH .parents [0 ],
224
+ f"{ index_name } .parquet" ,
225
+ ),
226
+ }
227
+
228
+ # Find available indices from idc-index-data
229
+ release_assets = self ._get_latest_idc_index_data_release_assets ()
230
+ for asset_name , asset_url in release_assets :
231
+ if asset_name .endswith (".parquet" ):
232
+ asset_name = os .path .splitext (asset_name )[0 ]
233
+ if asset_name not in indices_overview :
234
+ indices_overview [asset_name ] = {
235
+ "description" : None ,
236
+ "installed" : False ,
237
+ "url" : asset_url ,
238
+ }
239
+
240
+ self .indices_overview = pd .DataFrame .from_dict (
241
+ indices_overview , orient = "index"
242
+ )
243
+
244
+ return self .indices_overview
245
+
246
+ def fetch_index (self , index ) -> None :
247
+ """
248
+ Downloads requested index.
249
+
250
+ Args:
251
+ index (str): Name of the index to be downloaded.
252
+ """
253
+
254
+ if index not in self .indices_overview .index .tolist ():
255
+ logger .error (f"Index { index } is not available and can not be fetched." )
256
+ elif self .indices_overview .loc [index , "installed" ]:
257
+ logger .warning (
258
+ f"Index { index } already installed and will not be fetched again."
259
+ )
260
+ else :
261
+ response = requests .get (self .indices_overview .loc [index , "url" ], timeout = 30 )
262
+ if response .status_code == 200 :
263
+ filepath = os .path .join (
264
+ idc_index_data .IDC_INDEX_PARQUET_FILEPATH .parents [0 ],
265
+ f"{ index } .parquet" ,
266
+ )
267
+ with open (filepath , mode = "wb" ) as file :
268
+ file .write (response .content )
269
+ self .indices_overview .loc [index , "installed" ] = True
270
+ self .indices_overview .loc [index , "local_path" ] = filepath
271
+ else :
272
+ logger .error (f"Failed to fetch index: { response .status_code } " )
273
+
180
274
def get_collections (self ):
181
275
"""
182
276
Returns the collections present in IDC
0 commit comments