cytomining · d33bs · Jan 31, 2025 · Feb 2, 2025 · Feb 4, 2025 · Feb 26, 2025
diff --git a/pycytominer/cyto_utils/cell_locations.py b/pycytominer/cyto_utils/cell_locations.py
@@ -171,20 +171,19 @@ def _download_s3(self, uri: str):
 
         bucket, key = self._parse_s3_path(uri)
 
-        tmp_file = tempfile.NamedTemporaryFile(
+        with tempfile.NamedTemporaryFile(
             delete=False, suffix=pathlib.Path(key).name
-        )
-
-        self.s3.download_file(bucket, key, tmp_file.name)
+        ) as tmp_file:
+            self.s3.download_file(bucket, key, tmp_file.name)
 
-        # Check if the downloaded file exists and has a size greater than 0
-        tmp_file_path = pathlib.Path(tmp_file.name)
-        if tmp_file_path.exists() and tmp_file_path.stat().st_size > 0:
-            return tmp_file.name
-        else:
-            raise ValueError(
-                f"Downloaded file '{tmp_file.name}' is empty or does not exist."
-            )
+            # Check if the downloaded file exists and has a size greater than 0
+            tmp_file_path = pathlib.Path(tmp_file.name)
+            if tmp_file_path.exists() and tmp_file_path.stat().st_size > 0:
+                return tmp_file.name
+            else:
+                raise ValueError(
+                    f"Downloaded file '{tmp_file.name}' is empty or does not exist."
+                )
 
     def _load_metadata(self):
         """Load the metadata into a Pandas DataFrame

diff --git a/pycytominer/cyto_utils/load.py b/pycytominer/cyto_utils/load.py
@@ -38,11 +38,9 @@ def is_path_a_parquet_file(file: Union[str, pathlib.PurePath]) -> bool:
     except FileNotFoundError as e:
         print("load_profiles() didn't find the path.", e, sep="\n")
 
-    # Check if file path is a parquet file
-    if file.suffix.lower() == ".parquet":
-        return True
-
-    return False
+    # return boolean based on whether
+    # file path is a parquet file
+    return file.suffix.lower() == ".parquet"
 
 
 def infer_delim(file: str):

diff --git a/pycytominer/operations/noise_removal.py b/pycytominer/operations/noise_removal.py
@@ -60,7 +60,7 @@ def noise_removal(
         # Check if the column exists
         if noise_removal_perturb_groups not in population_df.columns:
             raise ValueError(
-                'f"{perturb} not found. Are you sure it is a ' "metadata column?"
+                'f"{perturb} not found. Are you sure it is a metadata column?'
             )
         # Assign the group info to the specified column
         group_info = population_df[noise_removal_perturb_groups]

diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py
@@ -324,8 +324,8 @@ def test_output_type():
     parquet_df = pd.read_parquet(test_output_file_parquet)
 
     # check to make sure the files were read in corrrectly as a pd.Dataframe
-    assert type(csv_df) == pd.DataFrame
-    assert type(parquet_df) == pd.DataFrame
+    assert isinstance(csv_df, pd.DataFrame)
+    assert isinstance(parquet_df, pd.DataFrame)
 
     # check to make sure both dataframes are the same regardless of the output_type
     pd.testing.assert_frame_equal(csv_df, parquet_df)
diff --git a/tests/test_annotate.py b/tests/test_annotate.py
@@ -203,8 +203,8 @@ def test_output_type():
     parquet_df = pd.read_parquet(OUTPUT_FILE_PARQUET)
 
     # check to make sure the files were read in corrrectly as a pd.Dataframe
-    assert type(csv_df) == pd.DataFrame
-    assert type(parquet_df) == pd.DataFrame
+    assert isinstance(csv_df, pd.DataFrame)
+    assert isinstance(parquet_df, pd.DataFrame)
 
     # check to make sure both dataframes are the same regardless of the output_type
     pd.testing.assert_frame_equal(csv_df, parquet_df)
diff --git a/tests/test_consensus.py b/tests/test_consensus.py
@@ -119,8 +119,8 @@ def test_output_type():
     parquet_df = pd.read_parquet(output_test_file_parquet)
 
     # check to make sure the files were read in corrrectly as a pd.Dataframe
-    assert type(csv_df) == pd.DataFrame
-    assert type(parquet_df) == pd.DataFrame
+    assert isinstance(csv_df, pd.DataFrame)
+    assert isinstance(parquet_df, pd.DataFrame)
 
     # check to make sure both dataframes are the same regardless of the output_type
     pd.testing.assert_frame_equal(csv_df, parquet_df)
diff --git a/tests/test_cyto_utils/test_cells.py b/tests/test_cyto_utils/test_cells.py
@@ -305,9 +305,11 @@ def test_load_compartment():
         if pd.api.types.is_float(CELLS_DF[colname].dtype)
         # check for columns which are of 'int64' type
         # note: pd.api.types.is_integer sometimes is unable to detect int64
-        or CELLS_DF[colname].dtype == "int64"
-        # avoid recasting the metadata_types
-        and colname not in metadata_types
+        or (
+            CELLS_DF[colname].dtype == "int64"
+            # avoid recasting the metadata_types
+            and colname not in metadata_types
+        )
     }
 
     # create deep copy of CELLS_DF with manually re-typed float columns as float32

diff --git a/tests/test_cyto_utils/test_util.py b/tests/test_cyto_utils/test_util.py
@@ -76,7 +76,7 @@ def test_check_compartments_not_valid():
 
 def test_get_default_compartments():
     default_comparments = get_default_compartments()
-    assert ["cells", "cytoplasm", "nuclei"] == default_comparments
+    assert default_comparments == ["cells", "cytoplasm", "nuclei"]
 
 
 def test_load_known_metadata_dictionary():

diff --git a/tests/test_feature_select.py b/tests/test_feature_select.py
@@ -489,8 +489,8 @@ def test_output_type():
     parquet_df = pd.read_parquet(output_test_file_parquet)
 
     # check to make sure the files were read in corrrectly as a pd.Dataframe
-    assert type(csv_df) == pd.DataFrame
-    assert type(parquet_df) == pd.DataFrame
+    assert isinstance(csv_df, pd.DataFrame)
+    assert isinstance(parquet_df, pd.DataFrame)
 
     # check to make sure both dataframes are the same regardless of the output_type
     pd.testing.assert_frame_equal(csv_df, parquet_df)

diff --git a/tests/test_normalize.py b/tests/test_normalize.py
@@ -556,8 +556,8 @@ def test_output_type():
     parquet_df = pd.read_parquet(output_test_file_parquet)
 
     # check to make sure the files were read in corrrectly as a pd.Dataframe
-    assert type(csv_df) == pd.DataFrame
-    assert type(parquet_df) == pd.DataFrame
+    assert isinstance(csv_df, pd.DataFrame)
+    assert isinstance(parquet_df, pd.DataFrame)
 
     # check to make sure both dataframes are the same regardless of the output_type
     pd.testing.assert_frame_equal(csv_df, parquet_df)
diff --git a/walkthroughs/single_cell_usage.ipynb b/walkthroughs/single_cell_usage.ipynb
@@ -26,16 +26,17 @@
    "source": [
     "import pathlib\n",
     "\n",
+    "# ignore mix type warnings from pandas\n",
+    "import warnings\n",
+    "\n",
     "import pandas as pd\n",
     "\n",
+    "from pycytominer import annotate, feature_select, normalize\n",
+    "\n",
     "# pycytominer imports\n",
     "from pycytominer.cyto_utils.cells import SingleCells\n",
-    "from pycytominer import annotate, normalize, feature_select\n",
-    "\n",
-    "# ignore mix type warnings from pandas\n",
-    "import warnings\n",
     "\n",
-    "warnings.filterwarnings(\"ignore\")\n"
+    "warnings.filterwarnings(\"ignore\")"
    ]
   },
   {
@@ -86,7 +87,7 @@
     "sc_profiles_path = out_dir / \"nf1_single_cell_profile.csv.gz\"\n",
     "anno_profiles_path = out_dir / \"nf1_annotated_profile.csv.gz\"\n",
     "norm_profiles_path = out_dir / \"nf1_normalized_profile.csv.gz\"\n",
-    "feat_profiles_path = out_dir / \"nf1_features_profile.csv.gz\"\n"
+    "feat_profiles_path = out_dir / \"nf1_features_profile.csv.gz\""
    ]
   },
   {
@@ -128,7 +129,7 @@
     "    },\n",
     "    \"Per_Cells\": {\"Per_Cytoplasm\": \"Cells_Number_Object_Number\"},\n",
     "    \"Per_Nuclei\": {\"Per_Cytoplasm\": \"Nuclei_Number_Object_Number\"},\n",
-    "}\n"
+    "}"
    ]
   },
   {
@@ -159,7 +160,7 @@
    ],
    "source": [
     "# setting up sqlite address\n",
-    "sqlite_address = f\"sqlite:///{str(plate_data)}\"\n",
+    "sqlite_address = f\"sqlite:///{plate_data!s}\"\n",
     "\n",
     "# loading single cell morphology data into pycyotminer's SingleCells Object\n",
     "single_cell_profile = SingleCells(\n",
@@ -177,7 +178,7 @@
     "# compressed csv file\n",
     "single_cell_profile.merge_single_cells(\n",
     "    sc_output_file=sc_profiles_path, compression_options=\"gzip\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -208,7 +209,7 @@
     "platemap_df = pd.read_csv(plate_map)\n",
     "\n",
     "# displaying platemap\n",
-    "print(platemap_df.columns.tolist())\n"
+    "print(platemap_df.columns.tolist())"
    ]
   },
   {
@@ -249,7 +250,7 @@
     ")\n",
     "\n",
     "# save message display\n",
-    "print(f\"Annotated profile saved in: {anno_profiles_path}\")\n"
+    "print(f\"Annotated profile saved in: {anno_profiles_path}\")"
    ]
   },
   {
@@ -296,7 +297,7 @@
     ")\n",
     "\n",
     "# save message display\n",
-    "print(f\"Normalized profile saved in: {norm_profiles_path}\")\n"
+    "print(f\"Normalized profile saved in: {norm_profiles_path}\")"
    ]
   },
   {
@@ -341,7 +342,7 @@
     ")\n",
     "\n",
     "# save message display\n",
-    "print(f\"Selected features profile saved in: {feat_profiles_path}\")\n"
+    "print(f\"Selected features profile saved in: {feat_profiles_path}\")"
    ]
   },
   {