Update processing notebook

timmanik · timmanik · commit 9a88dfe70e88 · 2024-06-05T00:08:51.000-04:00
diff --git a/01-cancer-data-analysis/fm-ad-notebook-processing.ipynb b/01-cancer-data-analysis/fm-ad-notebook-processing.ipynb
@@ -311,7 +311,31 @@
     "# Display these records for visual inspection. Then, verify that these records complement each other in terms of null and non-null values for all columns after the first five columns.\n",
     "# In other words, if one record has NaN values in a column, the other record should have non-NaN values in that same column, and vice versa.\n",
     "# Also, if both records have NaN values in the same column and ignore it from the comparison\n",
-    "# If the two records complement each other, print \"The two records complement each other.\" Otherwise, print \"The two records do not complement each other.\""
+    "# If the two records complement each other, print \"The two records complement each other.\" Otherwise, print \"The two records do not complement each other.\"\n",
+    "\n",
+    "# get the records with the case_id fcd9637f-00f2-49e9-bb87-94e556d5d7eb\n",
+    "case_id_records = df[df['case_id'] == 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb']\n",
+    "\n",
+    "# get the first record\n",
+    "record1 = case_id_records.iloc[0]\n",
+    "\n",
+    "# get the second record\n",
+    "record2 = case_id_records.iloc[1]\n",
+    "\n",
+    "# compare the two records\n",
+    "complement = True\n",
+    "for column in record1.index[5:]:\n",
+    "    if record1[column] != record2[column]:\n",
+    "        if pd.isnull(record1[column]) or pd.isnull(record2[column]):\n",
+    "            continue\n",
+    "        else:\n",
+    "            complement = False\n",
+    "            break\n",
+    "\n",
+    "if complement:\n",
+    "    print(\"The two records complement each other.\")\n",
+    "else:\n",
+    "    print(\"The two records do not complement each other.\")"
    ]
   },
   {
@@ -331,7 +355,34 @@
     "# Verify that these records complement each other in terms of null and non-null values for all columns AFTER the first five columns.\n",
     "# In other words, if one record has NaN values in a column, the other records should have non-NaN values in that same column, and vice versa.\n",
     "# Print a dictionary where each 'case_id' is a key and the corresponding value is a boolean indicating whether all records with that 'case_id' perfectly complement each other in terms of null and non-null values.\n",
-    "# For example, if all records with 'case_id' = 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' perfectly complement each other, the dictionary should have the key 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' with a value of True."
+    "# For example, if all records with 'case_id' = 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' perfectly complement each other, the dictionary should have the key 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' with a value of True.\n",
+    "\n",
+    "# create a dictionary to store the case_id and the boolean value\n",
+    "case_id_dict = {}\n",
+    "\n",
+    "# iterate over all unique case_id values\n",
+    "for case_id in df['case_id'].unique():\n",
+    "    # get the records with the case_id\n",
+    "    case_id_records = df[df['case_id'] == case_id]\n",
+    "    complement = True\n",
+    "    for i in range(len(case_id_records)):\n",
+    "        record1 = case_id_records.iloc[i]\n",
+    "        for j in range(i+1, len(case_id_records)):\n",
+    "            record2 = case_id_records.iloc[j]\n",
+    "            for column in record1.index[5:]:\n",
+    "                if record1[column] != record2[column]:\n",
+    "                    if pd.isnull(record1[column]) or pd.isnull(record2[column]):\n",
+    "                        continue\n",
+    "                    else:\n",
+    "                        complement = False\n",
+    "                        break\n",
+    "            if not complement:\n",
+    "                break\n",
+    "        if not complement:\n",
+    "            break\n",
+    "    case_id_dict[case_id] = complement\n",
+    "\n",
+    "case_id_dict"
    ]
   },
   {