@@ -133,6 +133,11 @@ def __init__(self):
133
133
},
134
134
}
135
135
136
+ # these will point to the dataframes containing the respective indices, once installed
137
+ self .sm_index = None
138
+ self .sm_instance_index = None
139
+ self .clinical_index = None
140
+
136
141
# Lookup s5cmd
137
142
self .s5cmdPath = shutil .which ("s5cmd" )
138
143
if self .s5cmdPath is None :
@@ -355,7 +360,9 @@ def fetch_index(self, index_name) -> None:
355
360
# self.index[["series_aws_url", "SeriesInstanceUID"]],
356
361
# on="SeriesInstanceUID", how="left"
357
362
# )
358
- setattr (self .__class__ , index_name , index_table )
363
+ # TODO: consider switching to class variable!
364
+ # setattr(self.__class__, index_name, index_table)
365
+ setattr (self , index_name , index_table )
359
366
self .indices_overview [index_name ]["installed" ] = True
360
367
self .indices_overview [index_name ]["file_path" ] = filepath
361
368
@@ -676,6 +683,54 @@ def get_series_file_URLs(self, seriesInstanceUID, source_bucket_location="aws"):
676
683
677
684
return file_names
678
685
686
+ def get_instance_file_URL (self , sopInstanceUID , source_bucket_location = "aws" ):
687
+ """
688
+ Get the bucket URL of the file corresponding to a given SOPInstanceUID.
689
+
690
+ This function will only return the URL for the Slide Microscopy (SM) instances,
691
+ which are maintained in the `sm_instance_index` table.
692
+
693
+ Args:
694
+ sopInstanceUID: string containing the value of DICOM SOPInstanceUID
695
+ source_bucket_location: string containing the source bucket location, either "aws" or "gcp"
696
+
697
+ Returns:
698
+ string containing the bucket URL of the file corresponding to the SOPInstanceUID,
699
+ or None if the SOPInstanceUID is not recognized
700
+ """
701
+
702
+ # sm_instance_index is required to complete this operation - install it!
703
+ self .fetch_index ("sm_instance_index" )
704
+
705
+ if self .sm_instance_index is None :
706
+ logger .error (
707
+ "sm_instance_index could not be installed. Please install it first using fetch_index."
708
+ )
709
+ return None
710
+
711
+ if sopInstanceUID not in self .sm_instance_index ["SOPInstanceUID" ].values : # pylint: disable=unsubscriptable-object
712
+ raise ValueError ("SOPInstanceUID not found in IDC sm_instance_index." )
713
+
714
+ # merge with the main index to get series_aws_url
715
+ selected_instance_df = self .sm_instance_index [ # pylint: disable=unsubscriptable-object
716
+ self .sm_instance_index ["SOPInstanceUID" ] == sopInstanceUID # pylint: disable=unsubscriptable-object
717
+ ].copy ()[["SeriesInstanceUID" , "SOPInstanceUID" , "crdc_instance_uuid" ]]
718
+ selected_instance_df = pd .merge (
719
+ selected_instance_df ,
720
+ self .index ,
721
+ on = "SeriesInstanceUID" ,
722
+ how = "left" ,
723
+ )
724
+
725
+ if source_bucket_location == "gcp" :
726
+ # replace AWS with the GCP bucket
727
+ self ._replace_aws_with_gcp_buckets (selected_instance_df , "series_aws_url" )
728
+
729
+ # instance files are named using crdc_instance_uuid
730
+ series_url = selected_instance_df .iloc [0 ]["series_aws_url" ][:- 1 ]
731
+ instance_uuid = selected_instance_df .iloc [0 ]["crdc_instance_uuid" ]
732
+ return series_url + instance_uuid + ".dcm"
733
+
679
734
def get_viewer_URL (
680
735
self , seriesInstanceUID = None , studyInstanceUID = None , viewer_selector = None
681
736
):
@@ -1721,8 +1776,8 @@ def download_from_selection(
1721
1776
# If SOPInstanceUID(s) are given, we need to join the main index with the instance-level index
1722
1777
sm_instance_index = None
1723
1778
if sopInstanceUID :
1724
- if hasattr (
1725
- self , " sm_instance_index"
1779
+ if (
1780
+ self . sm_instance_index is not None
1726
1781
): # check if instance-level index is installed
1727
1782
download_df = self .sm_instance_index
1728
1783
sm_instance_index = self .sm_instance_index
@@ -2138,12 +2193,12 @@ def sql_query(self, sql_query):
2138
2193
logger .debug ("Executing SQL query: " + sql_query )
2139
2194
# TODO: find a more elegant way to automate the following: https://www.perplexity.ai/search/write-python-code-that-iterate-XY9ppywbQFSRnOpgbwx_uQ
2140
2195
index = self .index
2141
- if hasattr ( self , " sm_index" ) :
2196
+ if self . sm_index is not None :
2142
2197
sm_index = self .sm_index
2143
- if hasattr ( self , " sm_instance_index" ) :
2198
+ if self . sm_instance_index is not None :
2144
2199
sm_instance_index = self .sm_instance_index
2145
- if hasattr ( self , " clinical_index" ) :
2200
+ if self . clinical_index is not None :
2146
2201
clinical_index = self .clinical_index
2147
- if hasattr ( self , " prior_versions_index" ) :
2202
+ if self . prior_versions_index is not None :
2148
2203
prior_versions_index = self .prior_versions_index
2149
2204
return duckdb .query (sql_query ).to_df ()
0 commit comments