Internet2
diff --git a/‎01-cancer-data-analysis/completed-notebooks/combined_data.csv
Lines changed: 72017 additions & 0 deletions b/‎01-cancer-data-analysis/completed-notebooks/combined_data.csv
Lines changed: 72017 additions & 0 deletions
diff --git a/‎01-cancer-data-analysis/completed-notebooks/combined_data_cleaned.csv
Lines changed: 18004 additions & 0 deletions b/‎01-cancer-data-analysis/completed-notebooks/combined_data_cleaned.csv
Lines changed: 18004 additions & 0 deletions
diff --git a/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-exploration-COMPLETED.ipynb
Lines changed: 19853 additions & 0 deletions b/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-exploration-COMPLETED.ipynb
Lines changed: 19853 additions & 0 deletions
diff --git a/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-processing-COMPLETED.ipynb
Lines changed: 20170 additions & 0 deletions b/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-processing-COMPLETED.ipynb
Lines changed: 20170 additions & 0 deletions
diff --git a/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-visualization-COMPLETED.ipynb
Lines changed: 653 additions & 0 deletions b/‎01-cancer-data-analysis/completed-notebooks/fm-ad-notebook-visualization-COMPLETED.ipynb
Lines changed: 653 additions & 0 deletions
diff --git a/‎01-cancer-data-analysis/fm-ad-notebook-exploration.ipynb
Lines changed: 23 additions & 8 deletions b/‎01-cancer-data-analysis/fm-ad-notebook-exploration.ipynb
Lines changed: 23 additions & 8 deletions
diff --git a/‎01-cancer-data-analysis/fm-ad-notebook-processing.ipynb
Lines changed: 49 additions & 21 deletions b/‎01-cancer-data-analysis/fm-ad-notebook-processing.ipynb
Lines changed: 49 additions & 21 deletions
diff --git a/‎01-cancer-data-analysis/fm-ad-notebook-visualization.ipynb
Lines changed: 8 additions & 17 deletions b/‎01-cancer-data-analysis/fm-ad-notebook-visualization.ipynb
Lines changed: 8 additions & 17 deletions
diff --git a/‎02-custom-data-analysis/cyber-defense-notebook.ipynb
Lines changed: 13 additions & 2 deletions b/‎02-custom-data-analysis/cyber-defense-notebook.ipynb
Lines changed: 13 additions & 2 deletions
@@ -191,10 +191,10 @@
     "        dataframes.append(df)\n",
     "\n",
     "# Concatenate all the dataframes in the list into a single dataframe\n",
-    "combined_df = pd.concat(dataframes, ignore_index=True)\n",
+    "df = pd.concat(dataframes, ignore_index=True)\n",
     "\n",
     "# Save dataframe to a CSV file\n",
-    "combined_df.to_csv('combined_data.csv', index=False)"
+    "df.to_csv('combined_data.csv', index=False)"
    ]
   },
   {
@@ -371,7 +371,9 @@
    },
    "outputs": [],
    "source": [
-    "# show which columns have the value 'Unknown' in them and show how many each column has in descending order"
+    "# show which columns have the value 'Unknown' in them and show how many each column has in descending order\n",
+    "unknown_values = df.isin(['Unknown']).sum().sort_values(ascending=False)\n",
+    "unknown_values[unknown_values > 0]"
    ]
   },
   {
@@ -394,7 +396,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the number of unique values in each column in descending order"
+    "# show the number of unique values in each column in descending order\n",
+    "unique_values = df.nunique().sort_values(ascending=False)\n",
+    "unique_values"
    ]
   },
   {
@@ -410,7 +414,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show 5 unique values of columns with unique values less than 100"
+    "# show 5 unique values of columns with unique values less than 100\n",
+    "for col, n_unique in unique_values.items():\n",
+    "    if n_unique < 100:\n",
+    "        unique_vals = df[col].unique()\n",
+    "        print(f\"{col}: {unique_vals[:5]}\")"
    ]
   },
   {
@@ -433,7 +441,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the number of duplicate records in the dataframe"
+    "# show the number of duplicate records in the dataframe\n",
+    "n_duplicates = df.duplicated().sum()\n",
+    "n_duplicates"
    ]
   },
   {
@@ -451,7 +461,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# count how many records share the same case_id"
+    "# count how many records share the same case_id\n",
+    "case_id_counts = df['case_id'].value_counts()\n",
+    "case_id_counts"
    ]
   },
   {
@@ -467,7 +479,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the records with the case_id 40e57344-a8ad-4de4-92e4-6e681c0593b7"
+    "# show the records with the case_id 40e57344-a8ad-4de4-92e4-6e681c0593b7\n",
+    "case_id = '40e57344-a8ad-4de4-92e4-6e681c0593b7'\n",
+    "\n",
+    "df[df['case_id'] == '40e57344-a8ad-4de4-92e4-6e681c0593b7']"
    ]
   },
   {
 
@@ -44,8 +44,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# convert the combined_data.csv to dataframe called combined_df\n",
-    "combined_df = pd.read_csv('combined_data.csv')"
+    "# convert the combined_data.csv to dataframe called df\n",
+    "df = pd.read_csv('combined_data.csv')"
    ]
   },
   {
@@ -134,15 +134,25 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# show the dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# show if case_id is in the columns"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# create a copy of the current dataframe\n",
     "# drop columns from the dictionary above"
    ]
   },
@@ -184,7 +194,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# change values 'Unknown' to NaN in the dataframe using numpy and create a new dataframe"
+    "# change values 'Unknown' to NaN in the dataframe using numpy"
    ]
   },
   {
@@ -223,7 +233,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# drop duplicate records in the dataframe and create a new dataframe\n"
+    "# drop duplicate records in the dataframe\n"
    ]
   },
   {
@@ -391,7 +401,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Check if all the values in the dictionary are True if so print \"All records complement each other.\" otherwise print \"Not all records complement each other.\""
+    "# Check if all the values in the dictionary are True if so print \"All records complement each other.\" otherwise print \"Not all records complement each other.\"\n",
+    "if all(case_id_dict.values()):\n",
+    "    print(\"All records complement each other.\")\n",
+    "else:\n",
+    "    print(\"Not all records complement each other.\")"
    ]
   },
   {
@@ -407,7 +421,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Combine records with the same 'case_id' and take the first non-null value for each group. Then create a new dataframe."
+    "# Combine records with the same 'case_id' and take the first non-null value for each group\n",
+    "df = df.groupby('case_id').first().reset_index()"
    ]
   },
   {
@@ -423,7 +438,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the shape of the new dataframe"
+    "# show the shape of the dataframe\n",
+    "df.shape"
    ]
   },
   {
@@ -439,7 +455,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the number of duplicate records in the new dataframe"
+    "# show the number of duplicate records in the dataframe\n",
+    "df.duplicated().sum()"
    ]
   },
   {
@@ -455,7 +472,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show number of unique values in each column in descending order"
+    "# show number of unique values in each column in descending order\n",
+    "df.nunique().sort_values(ascending=False)"
    ]
   },
   {
@@ -471,7 +489,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# check to see if there are any null values in the dataframe"
+    "# check to see if there are any null values in the dataframe\n",
+    "df.isnull().sum().sum()"
    ]
   },
   {
@@ -487,7 +506,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show the number unique values of the columns that have null values"
+    "# show the number unique values of the columns that have null values\n",
+    "df.isnull().sum()[df.isnull().sum() > 0]"
    ]
   },
   {
@@ -510,7 +530,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# describe stats on diagnoses.age_at_diagnosis column"
+    "# describe stats on diagnoses.age_at_diagnosis column\n",
+    "df['diagnoses.age_at_diagnosis'].describe()"
    ]
   },
   {
@@ -542,7 +563,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# create a new dataframe, create a new column 'diagnoses.age_at_diagnosis_years' by dividing 'diagnoses.age_at_diagnosis' by 365, and drop the 'diagonses.age_at_diagnosis' column"
+    "# create a new column 'diagnoses.age_at_diagnosis_years' by dividing 'diagnoses.age_at_diagnosis' by 365, and drop the 'diagonses.age_at_diagnosis' column\n",
+    "df['diagnoses.age_at_diagnosis_years'] = df['diagnoses.age_at_diagnosis'] / 365"
    ]
   },
   {
@@ -558,7 +580,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# count how many records that have the value of 'diagnosis.age_at_diagnosis_years' greater or equal to 89"
+    "# count how many records that have the value of 'diagnosis.age_at_diagnosis_years' greater or equal to 89\n",
+    "(df['diagnoses.age_at_diagnosis_years'] >= 89).sum()"
    ]
   },
   {
@@ -567,7 +590,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# drop the record with 'diagnosis.age_at_diagnosis_years' greater or equal to 89"
+    "# drop the record with 'diagnosis.age_at_diagnosis_years' greater or equal to 89\n",
+    "df = df[df['diagnoses.age_at_diagnosis_years'] < 89]"
    ]
   },
   {
@@ -583,7 +607,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# round down the diagnoses.age_at_diagnosis_years column and convert to integer"
+    "# round down the diagnoses.age_at_diagnosis_years column and convert to integer\n",
+    "df['diagnoses.age_at_diagnosis_years'] = df['diagnoses.age_at_diagnosis_years'].apply(np.floor).astype(int)"
    ]
   },
   {
@@ -599,7 +624,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# show statistical summary of the diagnoses.age_at_diagnosis_years column"
+    "# show statistical summary of the diagnoses.age_at_diagnosis_years column\n",
+    "df['diagnoses.age_at_diagnosis_years'].describe()"
    ]
   },
   {
@@ -615,7 +641,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# drop diagnosis.age_at_diagnosis column"
+    "# drop diagnosis.age_at_diagnosis column\n",
+    "df.drop(columns=['diagnoses.age_at_diagnosis'], inplace=True)"
    ]
   },
   {
@@ -638,7 +665,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Save dataframe to a new csv file named combined_data_cleaned.csv"
+    "# Save dataframe to a new csv file named combined_data_cleaned.csv\n",
+    "df.to_csv('combined_data_cleaned.csv', index=False)"
    ]
   }
  ],
 
@@ -53,23 +53,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "combined_data_cleansed_df = pd.read_csv('combined_data_cleaned.csv')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's rename our dataframe to df so that it will be easier to use the code suggestions from GitHub Copilot chat."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = combined_data_cleansed_df"
+    "df = pd.read_csv('combined_data_cleaned.csv')"
    ]
   },
   {
@@ -112,6 +96,13 @@
     "# show first few records"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should expect to see the dimension (18003, 24)."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
 
@@ -93,9 +93,20 @@
     "client = boto3.client('s3', config=Config(signature_version=UNSIGNED))\n",
     "cyber_bucket = 'cse-cic-ids2018'\n",
     "cyber_prefix = f'Processed Traffic Data for ML Algorithms'\n",
+    "file_name = 'Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv'\n",
     "\n",
-    "obj = client.get_object(Bucket= cyber_bucket , Key = cyber_prefix + '/' + 'Thursday-15-02-2018_TrafficForML_CICFlowMeter.csv')\n",
-    "df = pd.read_csv(io.BytesIO(obj['Body'].read()), encoding='utf8')"
+    "obj = client.get_object(Bucket= cyber_bucket , Key = cyber_prefix + '/' + file_name)\n",
+    "df = pd.read_csv(io.BytesIO(obj['Body'].read()), encoding='utf8')\n",
+    "df.to_csv(file_name, index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(file_name)"
    ]
   },
   {
Original file line number	Diff line number	Diff line change
`@@ -44,8 +44,8 @@`
`44`	`44`	`"metadata": {},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`47`		`- "# convert the combined_data.csv to dataframe called combined_df\n",`
`48`		`- "combined_df = pd.read_csv('combined_data.csv')"`
	`47`	`+ "# convert the combined_data.csv to dataframe called df\n",`
	`48`	`+ "df = pd.read_csv('combined_data.csv')"`
`49`	`49`	`]`
`50`	`50`	`},`
`51`	`51`	`{`
`@@ -134,15 +134,25 @@`
`134`	`134`	`"execution_count": null,`
`135`	`135`	`"metadata": {},`
`136`	`136`	`"outputs": [],`
`137`		`- "source": []`
	`137`	`+ "source": [`
	`138`	`+ "# show the dictionary"`
	`139`	`+ ]`
	`140`	`+ },`
	`141`	`+ {`
	`142`	`+ "cell_type": "code",`
	`143`	`+ "execution_count": null,`
	`144`	`+ "metadata": {},`
	`145`	`+ "outputs": [],`
	`146`	`+ "source": [`
	`147`	`+ "# show if case_id is in the columns"`
	`148`	`+ ]`
`138`	`149`	`},`
`139`	`150`	`{`
`140`	`151`	`"cell_type": "code",`
`141`	`152`	`"execution_count": null,`
`142`	`153`	`"metadata": {},`
`143`	`154`	`"outputs": [],`
`144`	`155`	`"source": [`
`145`		`- "# create a copy of the current dataframe\n",`
`146`	`156`	`"# drop columns from the dictionary above"`
`147`	`157`	`]`
`148`	`158`	`},`
`@@ -184,7 +194,7 @@`
`184`	`194`	`"metadata": {},`
`185`	`195`	`"outputs": [],`
`186`	`196`	`"source": [`
`187`		`- "# change values 'Unknown' to NaN in the dataframe using numpy and create a new dataframe"`
	`197`	`+ "# change values 'Unknown' to NaN in the dataframe using numpy"`
`188`	`198`	`]`
`189`	`199`	`},`
`190`	`200`	`{`
`@@ -223,7 +233,7 @@`
`223`	`233`	`"metadata": {},`
`224`	`234`	`"outputs": [],`
`225`	`235`	`"source": [`
`226`		`- "# drop duplicate records in the dataframe and create a new dataframe\n"`
	`236`	`+ "# drop duplicate records in the dataframe\n"`
`227`	`237`	`]`
`228`	`238`	`},`
`229`	`239`	`{`
`@@ -391,7 +401,11 @@`
`391`	`401`	`"metadata": {},`
`392`	`402`	`"outputs": [],`
`393`	`403`	`"source": [`
`394`		`- "# Check if all the values in the dictionary are True if so print \"All records complement each other.\" otherwise print \"Not all records complement each other.\""`
	`404`	`+ "# Check if all the values in the dictionary are True if so print \"All records complement each other.\" otherwise print \"Not all records complement each other.\"\n",`
	`405`	`+ "if all(case_id_dict.values()):\n",`
	`406`	`+ " print(\"All records complement each other.\")\n",`
	`407`	`+ "else:\n",`
	`408`	`+ " print(\"Not all records complement each other.\")"`
`395`	`409`	`]`
`396`	`410`	`},`
`397`	`411`	`{`
`@@ -407,7 +421,8 @@`
`407`	`421`	`"metadata": {},`
`408`	`422`	`"outputs": [],`
`409`	`423`	`"source": [`
`410`		`- "# Combine records with the same 'case_id' and take the first non-null value for each group. Then create a new dataframe."`
	`424`	`+ "# Combine records with the same 'case_id' and take the first non-null value for each group\n",`
	`425`	`+ "df = df.groupby('case_id').first().reset_index()"`
`411`	`426`	`]`
`412`	`427`	`},`
`413`	`428`	`{`
`@@ -423,7 +438,8 @@`
`423`	`438`	`"metadata": {},`
`424`	`439`	`"outputs": [],`
`425`	`440`	`"source": [`
`426`		`- "# show the shape of the new dataframe"`
	`441`	`+ "# show the shape of the dataframe\n",`
	`442`	`+ "df.shape"`
`427`	`443`	`]`
`428`	`444`	`},`
`429`	`445`	`{`
`@@ -439,7 +455,8 @@`
`439`	`455`	`"metadata": {},`
`440`	`456`	`"outputs": [],`
`441`	`457`	`"source": [`
`442`		`- "# show the number of duplicate records in the new dataframe"`
	`458`	`+ "# show the number of duplicate records in the dataframe\n",`
	`459`	`+ "df.duplicated().sum()"`
`443`	`460`	`]`
`444`	`461`	`},`
`445`	`462`	`{`
`@@ -455,7 +472,8 @@`
`455`	`472`	`"metadata": {},`
`456`	`473`	`"outputs": [],`
`457`	`474`	`"source": [`
`458`		`- "# show number of unique values in each column in descending order"`
	`475`	`+ "# show number of unique values in each column in descending order\n",`
	`476`	`+ "df.nunique().sort_values(ascending=False)"`
`459`	`477`	`]`
`460`	`478`	`},`
`461`	`479`	`{`
`@@ -471,7 +489,8 @@`
`471`	`489`	`"metadata": {},`
`472`	`490`	`"outputs": [],`
`473`	`491`	`"source": [`
`474`		`- "# check to see if there are any null values in the dataframe"`
	`492`	`+ "# check to see if there are any null values in the dataframe\n",`
	`493`	`+ "df.isnull().sum().sum()"`
`475`	`494`	`]`
`476`	`495`	`},`
`477`	`496`	`{`
`@@ -487,7 +506,8 @@`
`487`	`506`	`"metadata": {},`
`488`	`507`	`"outputs": [],`
`489`	`508`	`"source": [`
`490`		`- "# show the number unique values of the columns that have null values"`
	`509`	`+ "# show the number unique values of the columns that have null values\n",`
	`510`	`+ "df.isnull().sum()[df.isnull().sum() > 0]"`
`491`	`511`	`]`
`492`	`512`	`},`
`493`	`513`	`{`
`@@ -510,7 +530,8 @@`
`510`	`530`	`"metadata": {},`
`511`	`531`	`"outputs": [],`
`512`	`532`	`"source": [`
`513`		`- "# describe stats on diagnoses.age_at_diagnosis column"`
	`533`	`+ "# describe stats on diagnoses.age_at_diagnosis column\n",`
	`534`	`+ "df['diagnoses.age_at_diagnosis'].describe()"`
`514`	`535`	`]`
`515`	`536`	`},`
`516`	`537`	`{`
`@@ -542,7 +563,8 @@`
`542`	`563`	`"metadata": {},`
`543`	`564`	`"outputs": [],`
`544`	`565`	`"source": [`
`545`		`- "# create a new dataframe, create a new column 'diagnoses.age_at_diagnosis_years' by dividing 'diagnoses.age_at_diagnosis' by 365, and drop the 'diagonses.age_at_diagnosis' column"`
	`566`	`+ "# create a new column 'diagnoses.age_at_diagnosis_years' by dividing 'diagnoses.age_at_diagnosis' by 365, and drop the 'diagonses.age_at_diagnosis' column\n",`
	`567`	`+ "df['diagnoses.age_at_diagnosis_years'] = df['diagnoses.age_at_diagnosis'] / 365"`
`546`	`568`	`]`
`547`	`569`	`},`
`548`	`570`	`{`
`@@ -558,7 +580,8 @@`
`558`	`580`	`"metadata": {},`
`559`	`581`	`"outputs": [],`
`560`	`582`	`"source": [`
`561`		`- "# count how many records that have the value of 'diagnosis.age_at_diagnosis_years' greater or equal to 89"`
	`583`	`+ "# count how many records that have the value of 'diagnosis.age_at_diagnosis_years' greater or equal to 89\n",`
	`584`	`+ "(df['diagnoses.age_at_diagnosis_years'] >= 89).sum()"`
`562`	`585`	`]`
`563`	`586`	`},`
`564`	`587`	`{`
`@@ -567,7 +590,8 @@`
`567`	`590`	`"metadata": {},`
`568`	`591`	`"outputs": [],`
`569`	`592`	`"source": [`
`570`		`- "# drop the record with 'diagnosis.age_at_diagnosis_years' greater or equal to 89"`
	`593`	`+ "# drop the record with 'diagnosis.age_at_diagnosis_years' greater or equal to 89\n",`
	`594`	`+ "df = df[df['diagnoses.age_at_diagnosis_years'] < 89]"`
`571`	`595`	`]`
`572`	`596`	`},`
`573`	`597`	`{`
`@@ -583,7 +607,8 @@`
`583`	`607`	`"metadata": {},`
`584`	`608`	`"outputs": [],`
`585`	`609`	`"source": [`
`586`		`- "# round down the diagnoses.age_at_diagnosis_years column and convert to integer"`
	`610`	`+ "# round down the diagnoses.age_at_diagnosis_years column and convert to integer\n",`
	`611`	`+ "df['diagnoses.age_at_diagnosis_years'] = df['diagnoses.age_at_diagnosis_years'].apply(np.floor).astype(int)"`
`587`	`612`	`]`
`588`	`613`	`},`
`589`	`614`	`{`
`@@ -599,7 +624,8 @@`
`599`	`624`	`"metadata": {},`
`600`	`625`	`"outputs": [],`
`601`	`626`	`"source": [`
`602`		`- "# show statistical summary of the diagnoses.age_at_diagnosis_years column"`
	`627`	`+ "# show statistical summary of the diagnoses.age_at_diagnosis_years column\n",`
	`628`	`+ "df['diagnoses.age_at_diagnosis_years'].describe()"`
`603`	`629`	`]`
`604`	`630`	`},`
`605`	`631`	`{`
`@@ -615,7 +641,8 @@`
`615`	`641`	`"metadata": {},`
`616`	`642`	`"outputs": [],`
`617`	`643`	`"source": [`
`618`		`- "# drop diagnosis.age_at_diagnosis column"`
	`644`	`+ "# drop diagnosis.age_at_diagnosis column\n",`
	`645`	`+ "df.drop(columns=['diagnoses.age_at_diagnosis'], inplace=True)"`
`619`	`646`	`]`
`620`	`647`	`},`
`621`	`648`	`{`
`@@ -638,7 +665,8 @@`
`638`	`665`	`"metadata": {},`
`639`	`666`	`"outputs": [],`
`640`	`667`	`"source": [`
`641`		`- "# Save dataframe to a new csv file named combined_data_cleaned.csv"`
	`668`	`+ "# Save dataframe to a new csv file named combined_data_cleaned.csv\n",`
	`669`	`+ "df.to_csv('combined_data_cleaned.csv', index=False)"`
`642`	`670`	`]`
`643`	`671`	`}`
`644`	`672`	`],`