Skip to content

Commit 9a88dfe

Browse files
committed
Update processing notebook
1 parent bf2c711 commit 9a88dfe

File tree

1 file changed

+53
-2
lines changed

1 file changed

+53
-2
lines changed

01-cancer-data-analysis/fm-ad-notebook-processing.ipynb

+53-2
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,31 @@
311311
"# Display these records for visual inspection. Then, verify that these records complement each other in terms of null and non-null values for all columns after the first five columns.\n",
312312
"# In other words, if one record has NaN values in a column, the other record should have non-NaN values in that same column, and vice versa.\n",
313313
"# Also, if both records have NaN values in the same column and ignore it from the comparison\n",
314-
"# If the two records complement each other, print \"The two records complement each other.\" Otherwise, print \"The two records do not complement each other.\""
314+
"# If the two records complement each other, print \"The two records complement each other.\" Otherwise, print \"The two records do not complement each other.\"\n",
315+
"\n",
316+
"# get the records with the case_id fcd9637f-00f2-49e9-bb87-94e556d5d7eb\n",
317+
"case_id_records = df[df['case_id'] == 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb']\n",
318+
"\n",
319+
"# get the first record\n",
320+
"record1 = case_id_records.iloc[0]\n",
321+
"\n",
322+
"# get the second record\n",
323+
"record2 = case_id_records.iloc[1]\n",
324+
"\n",
325+
"# compare the two records\n",
326+
"complement = True\n",
327+
"for column in record1.index[5:]:\n",
328+
" if record1[column] != record2[column]:\n",
329+
" if pd.isnull(record1[column]) or pd.isnull(record2[column]):\n",
330+
" continue\n",
331+
" else:\n",
332+
" complement = False\n",
333+
" break\n",
334+
"\n",
335+
"if complement:\n",
336+
" print(\"The two records complement each other.\")\n",
337+
"else:\n",
338+
" print(\"The two records do not complement each other.\")"
315339
]
316340
},
317341
{
@@ -331,7 +355,34 @@
331355
"# Verify that these records complement each other in terms of null and non-null values for all columns AFTER the first five columns.\n",
332356
"# In other words, if one record has NaN values in a column, the other records should have non-NaN values in that same column, and vice versa.\n",
333357
"# Print a dictionary where each 'case_id' is a key and the corresponding value is a boolean indicating whether all records with that 'case_id' perfectly complement each other in terms of null and non-null values.\n",
334-
"# For example, if all records with 'case_id' = 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' perfectly complement each other, the dictionary should have the key 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' with a value of True."
358+
"# For example, if all records with 'case_id' = 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' perfectly complement each other, the dictionary should have the key 'fcd9637f-00f2-49e9-bb87-94e556d5d7eb' with a value of True.\n",
359+
"\n",
360+
"# create a dictionary to store the case_id and the boolean value\n",
361+
"case_id_dict = {}\n",
362+
"\n",
363+
"# iterate over all unique case_id values\n",
364+
"for case_id in df['case_id'].unique():\n",
365+
" # get the records with the case_id\n",
366+
" case_id_records = df[df['case_id'] == case_id]\n",
367+
" complement = True\n",
368+
" for i in range(len(case_id_records)):\n",
369+
" record1 = case_id_records.iloc[i]\n",
370+
" for j in range(i+1, len(case_id_records)):\n",
371+
" record2 = case_id_records.iloc[j]\n",
372+
" for column in record1.index[5:]:\n",
373+
" if record1[column] != record2[column]:\n",
374+
" if pd.isnull(record1[column]) or pd.isnull(record2[column]):\n",
375+
" continue\n",
376+
" else:\n",
377+
" complement = False\n",
378+
" break\n",
379+
" if not complement:\n",
380+
" break\n",
381+
" if not complement:\n",
382+
" break\n",
383+
" case_id_dict[case_id] = complement\n",
384+
"\n",
385+
"case_id_dict"
335386
]
336387
},
337388
{

0 commit comments

Comments
 (0)