From 6d64d0d7b50312e8300cd7c62ae78d9dc0fa7ac3 Mon Sep 17 00:00:00 2001 From: Olomo Ayooluwaposi <75603128+posi-olomo@users.noreply.github.com> Date: Tue, 29 Mar 2022 19:36:57 +0100 Subject: [PATCH] Add files via upload --- Titanic_Competition.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 Titanic_Competition.ipynb diff --git a/Titanic_Competition.ipynb b/Titanic_Competition.ipynb new file mode 100644 index 0000000..7e4d925 --- /dev/null +++ b/Titanic_Competition.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:58.075214Z","iopub.execute_input":"2022-03-29T18:27:58.076043Z","iopub.status.idle":"2022-03-29T18:27:58.092331Z","shell.execute_reply.started":"2022-03-29T18:27:58.075929Z","shell.execute_reply":"2022-03-29T18:27:58.091246Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import mean_absolute_error as mae\nfrom sklearn.metrics import roc_auc_score\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import confusion_matrix","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:58.099452Z","iopub.execute_input":"2022-03-29T18:27:58.100112Z","iopub.status.idle":"2022-03-29T18:27:59.211086Z","shell.execute_reply.started":"2022-03-29T18:27:58.100056Z","shell.execute_reply":"2022-03-29T18:27:59.210170Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"df = pd.read_csv('/kaggle/input/titanic/train.csv')\ndf.columns","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.212690Z","iopub.execute_input":"2022-03-29T18:27:59.213292Z","iopub.status.idle":"2022-03-29T18:27:59.240009Z","shell.execute_reply.started":"2022-03-29T18:27:59.213245Z","shell.execute_reply":"2022-03-29T18:27:59.238749Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"data = df.copy()\ndata.pop(\"PassengerId\")\ndata.head()","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.242352Z","iopub.execute_input":"2022-03-29T18:27:59.242770Z","iopub.status.idle":"2022-03-29T18:27:59.278993Z","shell.execute_reply.started":"2022-03-29T18:27:59.242726Z","shell.execute_reply":"2022-03-29T18:27:59.277884Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"import seaborn as sns","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:27:59.280514Z","iopub.execute_input":"2022-03-29T18:27:59.280858Z","iopub.status.idle":"2022-03-29T18:27:59.358458Z","shell.execute_reply.started":"2022-03-29T18:27:59.280817Z","shell.execute_reply":"2022-03-29T18:27:59.357470Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"markdown","source":"#### Explorative Data Analysis","metadata":{"editable":false}},{"cell_type":"code","source":"data.groupby('Pclass').count()['Survived'].reset_index()","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.360131Z","iopub.execute_input":"2022-03-29T18:27:59.360739Z","iopub.status.idle":"2022-03-29T18:27:59.376771Z","shell.execute_reply.started":"2022-03-29T18:27:59.360685Z","shell.execute_reply":"2022-03-29T18:27:59.375972Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"sns.catplot(x=\"Pclass\", y=\"Fare\", data=data, kind=\"boxen\");","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.378000Z","iopub.execute_input":"2022-03-29T18:27:59.378663Z","iopub.status.idle":"2022-03-29T18:27:59.629513Z","shell.execute_reply.started":"2022-03-29T18:27:59.378630Z","shell.execute_reply":"2022-03-29T18:27:59.628651Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"sns.catplot(x=\"Survived\", y=\"Fare\", data=data, kind=\"boxen\");","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.630614Z","iopub.execute_input":"2022-03-29T18:27:59.631100Z","iopub.status.idle":"2022-03-29T18:27:59.826936Z","shell.execute_reply.started":"2022-03-29T18:27:59.631045Z","shell.execute_reply":"2022-03-29T18:27:59.825945Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"markdown","source":"#### Cleaning the \"Cabin\" column","metadata":{"editable":false}},{"cell_type":"code","source":"mode = (data[\"Cabin\"].mode())[0]\nmode","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:27:59.829323Z","iopub.execute_input":"2022-03-29T18:27:59.829629Z","iopub.status.idle":"2022-03-29T18:27:59.839926Z","shell.execute_reply.started":"2022-03-29T18:27:59.829598Z","shell.execute_reply":"2022-03-29T18:27:59.838835Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"data[\"Cabin\"][:13]","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.841904Z","iopub.execute_input":"2022-03-29T18:27:59.842236Z","iopub.status.idle":"2022-03-29T18:27:59.853895Z","shell.execute_reply.started":"2022-03-29T18:27:59.842204Z","shell.execute_reply":"2022-03-29T18:27:59.852858Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"def get_cabin(v):\n n = 0\n for i in v:\n try: i + \"2\"\n except: n+=1\n else:\n v[n] = i[:1]\n n+=1\n return v","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.855311Z","iopub.execute_input":"2022-03-29T18:27:59.855841Z","iopub.status.idle":"2022-03-29T18:27:59.862195Z","shell.execute_reply.started":"2022-03-29T18:27:59.855807Z","shell.execute_reply":"2022-03-29T18:27:59.861417Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"get_cabin(data[\"Cabin\"])\n# The number of people in each Cabin\nprint(data.groupby('Cabin').count()['Survived'].reset_index())\n# The number of people that survived in each Cabin \nprint(data.groupby('Cabin').sum()['Survived'].reset_index())","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.863214Z","iopub.execute_input":"2022-03-29T18:27:59.863616Z","iopub.status.idle":"2022-03-29T18:27:59.978595Z","shell.execute_reply.started":"2022-03-29T18:27:59.863586Z","shell.execute_reply":"2022-03-29T18:27:59.977661Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"markdown","source":"### Replace Missing Values","metadata":{"editable":false}},{"cell_type":"code","source":"# How many missing values are in each column\nfor i in data.columns:\n print(i, sum(data[i].isnull()))\ndata.shape","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:27:59.979815Z","iopub.execute_input":"2022-03-29T18:27:59.980143Z","iopub.status.idle":"2022-03-29T18:27:59.995011Z","shell.execute_reply.started":"2022-03-29T18:27:59.980112Z","shell.execute_reply":"2022-03-29T18:27:59.993996Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"def replace_mean(data, column):\n mean = round((data[column].mean(axis = 0, skipna=True)), 2)\n data[column] = data[column].replace(np.nan, mean)\n return data[column]\n\ndef replace_mode(data, column):\n mode = (data[column].mode())[0]\n data[column] = data[column].replace(np.nan, mode)\n return data[column]","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:27:59.996696Z","iopub.execute_input":"2022-03-29T18:27:59.997139Z","iopub.status.idle":"2022-03-29T18:28:00.003890Z","shell.execute_reply.started":"2022-03-29T18:27:59.997094Z","shell.execute_reply":"2022-03-29T18:28:00.002934Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"# Clean missing values in \"Age\"\nreplace_mean(data,\"Age\")\n\n\n# Replace missing values in \"Cabin\" with \"H\" a new Cabin alphabet.\n# I did this because of the huge amount of missing data in the column\nreplace_mode(data, \"Cabin\")\n\n\n# Replace the missing values with the column's mode, which in this case is \"S\"\n# I did tthis because there are just 2 missing values\nreplace_mode(data, \"Embarked\")","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.005119Z","iopub.execute_input":"2022-03-29T18:28:00.005414Z","iopub.status.idle":"2022-03-29T18:28:00.023648Z","shell.execute_reply.started":"2022-03-29T18:28:00.005385Z","shell.execute_reply":"2022-03-29T18:28:00.022911Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"# How many missing values are in each column\nfor i in data.columns:\n print(i, sum(data[i].isnull()))","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.024845Z","iopub.execute_input":"2022-03-29T18:28:00.025167Z","iopub.status.idle":"2022-03-29T18:28:00.044210Z","shell.execute_reply.started":"2022-03-29T18:28:00.025137Z","shell.execute_reply":"2022-03-29T18:28:00.043044Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"markdown","source":"#### Survival of Women","metadata":{"editable":false}},{"cell_type":"code","source":"print(\"%d women were onboard\"% df[df['Sex'] == 'female'].count()['Survived'])\nprint(\"%d women survived\"% df[df['Sex'] == 'female'].sum()['Survived'])","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.045781Z","iopub.execute_input":"2022-03-29T18:28:00.046240Z","iopub.status.idle":"2022-03-29T18:28:00.061792Z","shell.execute_reply.started":"2022-03-29T18:28:00.046194Z","shell.execute_reply":"2022-03-29T18:28:00.060990Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"#The rows that contain only \"female\"\ncc = df[df['Sex'] == 'female']\n# Show the number of women who survived according to \"Pclass\"\ncc.groupby('Pclass').sum()['Survived'].reset_index()","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.062813Z","iopub.execute_input":"2022-03-29T18:28:00.063240Z","iopub.status.idle":"2022-03-29T18:28:00.081020Z","shell.execute_reply.started":"2022-03-29T18:28:00.063208Z","shell.execute_reply":"2022-03-29T18:28:00.079962Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"markdown","source":"#### Survival of Men","metadata":{"editable":false}},{"cell_type":"code","source":"print(\"%d men were onboard\"% df[df['Sex'] == 'male'].count()['Survived'])\nprint(\"%d men survived\"% df[df['Sex'] == 'male'].sum()['Survived'])","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.082337Z","iopub.execute_input":"2022-03-29T18:28:00.082751Z","iopub.status.idle":"2022-03-29T18:28:00.099541Z","shell.execute_reply.started":"2022-03-29T18:28:00.082720Z","shell.execute_reply":"2022-03-29T18:28:00.098424Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"#The rows that contain only \"male\"\ncd = df[df['Sex'] == 'male']\n# Show the number of men who survived according to \"Pclass\"\ncd.groupby('Pclass').sum()['Survived'].reset_index()","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.101049Z","iopub.execute_input":"2022-03-29T18:28:00.101452Z","iopub.status.idle":"2022-03-29T18:28:00.119023Z","shell.execute_reply.started":"2022-03-29T18:28:00.101409Z","shell.execute_reply":"2022-03-29T18:28:00.118131Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"markdown","source":"#### Change Name to length of Name","metadata":{}},{"cell_type":"code","source":"data[\"Name\"] = [len(i) for i in data[\"Name\"]]\ndata[\"Name\"]","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.120208Z","iopub.execute_input":"2022-03-29T18:28:00.120630Z","iopub.status.idle":"2022-03-29T18:28:00.130456Z","shell.execute_reply.started":"2022-03-29T18:28:00.120597Z","shell.execute_reply":"2022-03-29T18:28:00.129588Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"markdown","source":"*This shows that the cabin that a passenger is in affects whether they survive or not*","metadata":{"editable":false}},{"cell_type":"markdown","source":"### **The survival function**","metadata":{"editable":false}},{"cell_type":"code","source":"def c_e_survival(data, c_e, c_e_survival_rate, embarked_survival_rate):\n data[c_e_survival_rate] = [embarked_survival_rate[i] for i in data[c_e]]\n return data[c_e_survival_rate]","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.131786Z","iopub.execute_input":"2022-03-29T18:28:00.132257Z","iopub.status.idle":"2022-03-29T18:28:00.141081Z","shell.execute_reply.started":"2022-03-29T18:28:00.132226Z","shell.execute_reply":"2022-03-29T18:28:00.139543Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"markdown","source":"#### Cabin Survival Rate","metadata":{"editable":false}},{"cell_type":"code","source":"# The number of people in each Cabin\nprint(data.groupby([i for i in data[\"Cabin\"]],dropna = False).count()[\"Survived\"])\n# The number of people that survived in each Cabin\nprint(data.groupby([i for i in data[\"Cabin\"]],dropna = False).sum()[\"Survived\"])","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.142740Z","iopub.execute_input":"2022-03-29T18:28:00.143254Z","iopub.status.idle":"2022-03-29T18:28:00.173786Z","shell.execute_reply.started":"2022-03-29T18:28:00.143199Z","shell.execute_reply":"2022-03-29T18:28:00.172994Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"Cabin_survival_rate = data.groupby(\"Cabin\").mean()[\"Survived\"]\nCabin_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.177272Z","iopub.execute_input":"2022-03-29T18:28:00.177697Z","iopub.status.idle":"2022-03-29T18:28:00.188533Z","shell.execute_reply.started":"2022-03-29T18:28:00.177664Z","shell.execute_reply":"2022-03-29T18:28:00.187638Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"#Run the function\nc_e_survival(data, \"Cabin\", \"Cabin_survival_rate\", Cabin_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.190325Z","iopub.execute_input":"2022-03-29T18:28:00.190939Z","iopub.status.idle":"2022-03-29T18:28:00.210217Z","shell.execute_reply.started":"2022-03-29T18:28:00.190902Z","shell.execute_reply":"2022-03-29T18:28:00.209100Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"markdown","source":"#### \"Embarked\" Column","metadata":{"editable":false}},{"cell_type":"code","source":"data[\"Embarked\"][:13]","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.211661Z","iopub.execute_input":"2022-03-29T18:28:00.212153Z","iopub.status.idle":"2022-03-29T18:28:00.219817Z","shell.execute_reply.started":"2022-03-29T18:28:00.212115Z","shell.execute_reply":"2022-03-29T18:28:00.218793Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"Embarked\" survival rate","metadata":{"editable":false}},{"cell_type":"code","source":"# The number of people from each location\nprint(data.groupby([i for i in data[\"Embarked\"]],dropna = False).count()[\"Survived\"])\n# The number of people that survived in each location \nprint(data.groupby([i for i in data[\"Embarked\"]],dropna = False).sum()[\"Survived\"])","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.221460Z","iopub.execute_input":"2022-03-29T18:28:00.222053Z","iopub.status.idle":"2022-03-29T18:28:00.244168Z","shell.execute_reply.started":"2022-03-29T18:28:00.222009Z","shell.execute_reply":"2022-03-29T18:28:00.243051Z"},"trusted":true},"execution_count":27,"outputs":[]},{"cell_type":"markdown","source":"As you can see the location of embarkment plays a huge role in your survival","metadata":{}},{"cell_type":"code","source":"embarked_survival_rate = data.groupby(\"Embarked\").mean()[\"Survived\"]\nembarked_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.245481Z","iopub.execute_input":"2022-03-29T18:28:00.245791Z","iopub.status.idle":"2022-03-29T18:28:00.257316Z","shell.execute_reply.started":"2022-03-29T18:28:00.245760Z","shell.execute_reply":"2022-03-29T18:28:00.256320Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"#Run the function\nc_e_survival(data, \"Embarked\", \"Embarked_survival_rate\", embarked_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.258393Z","iopub.execute_input":"2022-03-29T18:28:00.258905Z","iopub.status.idle":"2022-03-29T18:28:00.272339Z","shell.execute_reply.started":"2022-03-29T18:28:00.258841Z","shell.execute_reply":"2022-03-29T18:28:00.271258Z"},"trusted":true},"execution_count":29,"outputs":[]},{"cell_type":"code","source":"data.head()\ndata[data[\"Survived\"]==1].groupby(\"Ticket\").count()","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:00.273394Z","iopub.execute_input":"2022-03-29T18:28:00.273673Z","iopub.status.idle":"2022-03-29T18:28:00.311664Z","shell.execute_reply.started":"2022-03-29T18:28:00.273647Z","shell.execute_reply":"2022-03-29T18:28:00.310489Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"Age\" survival rate","metadata":{"execution":{"iopub.status.busy":"2022-03-27T12:25:31.182466Z","iopub.execute_input":"2022-03-27T12:25:31.182775Z","iopub.status.idle":"2022-03-27T12:25:31.186727Z","shell.execute_reply.started":"2022-03-27T12:25:31.18275Z","shell.execute_reply":"2022-03-27T12:25:31.185939Z"}}},{"cell_type":"code","source":"# Check how many ages are in the data\ndata[\"Age\"].unique()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.312799Z","iopub.execute_input":"2022-03-29T18:28:00.313113Z","iopub.status.idle":"2022-03-29T18:28:00.320102Z","shell.execute_reply.started":"2022-03-29T18:28:00.313085Z","shell.execute_reply":"2022-03-29T18:28:00.319281Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"code","source":"sns.catplot(x= \"Age\", y = \"Survived\", data=data, height =11, aspect =2)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:00.321184Z","iopub.execute_input":"2022-03-29T18:28:00.321628Z","iopub.status.idle":"2022-03-29T18:28:03.442188Z","shell.execute_reply.started":"2022-03-29T18:28:00.321596Z","shell.execute_reply":"2022-03-29T18:28:03.440908Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"[(-np.inf, 1),(2, 5),(6, 16), (17, 27), (28, 49), (50, 69), (70, np.inf)]","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.443596Z","iopub.execute_input":"2022-03-29T18:28:03.443935Z","iopub.status.idle":"2022-03-29T18:28:03.451401Z","shell.execute_reply.started":"2022-03-29T18:28:03.443903Z","shell.execute_reply":"2022-03-29T18:28:03.450545Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"# Check how many ages are in the data\ndata[\"Age\"].unique()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.452651Z","iopub.execute_input":"2022-03-29T18:28:03.453248Z","iopub.status.idle":"2022-03-29T18:28:03.465053Z","shell.execute_reply.started":"2022-03-29T18:28:03.453208Z","shell.execute_reply":"2022-03-29T18:28:03.463989Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"# Let's group the ages together and then calculate the survival rate\nbins = pd.IntervalIndex.from_tuples([(-np.inf, 1),(1, 5),(5, 16), (16, 27), (27, 39), (39, 49), (49, 69), (69, np.inf)])\nbins","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.466663Z","iopub.execute_input":"2022-03-29T18:28:03.467021Z","iopub.status.idle":"2022-03-29T18:28:03.480400Z","shell.execute_reply.started":"2022-03-29T18:28:03.466988Z","shell.execute_reply":"2022-03-29T18:28:03.479448Z"},"trusted":true},"execution_count":35,"outputs":[]},{"cell_type":"code","source":"data['age_bracket'] = pd.cut(data['Age'], bins)\ndata['age_bracket']","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.481725Z","iopub.execute_input":"2022-03-29T18:28:03.482242Z","iopub.status.idle":"2022-03-29T18:28:03.505680Z","shell.execute_reply.started":"2022-03-29T18:28:03.482209Z","shell.execute_reply":"2022-03-29T18:28:03.504420Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"age_survival_rate = data.groupby(\"age_bracket\").mean()[\"Survived\"]\nage_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.507326Z","iopub.execute_input":"2022-03-29T18:28:03.507681Z","iopub.status.idle":"2022-03-29T18:28:03.520640Z","shell.execute_reply.started":"2022-03-29T18:28:03.507646Z","shell.execute_reply":"2022-03-29T18:28:03.519283Z"},"trusted":true},"execution_count":37,"outputs":[]},{"cell_type":"code","source":"c_e_survival(data, \"age_bracket\", 'age_survival_rate', age_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.522020Z","iopub.execute_input":"2022-03-29T18:28:03.522369Z","iopub.status.idle":"2022-03-29T18:28:03.701162Z","shell.execute_reply.started":"2022-03-29T18:28:03.522334Z","shell.execute_reply":"2022-03-29T18:28:03.700399Z"},"trusted":true},"execution_count":38,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"Parch\" survival rate","metadata":{}},{"cell_type":"code","source":"data.groupby(\"Parch\").count()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.702150Z","iopub.execute_input":"2022-03-29T18:28:03.702539Z","iopub.status.idle":"2022-03-29T18:28:03.723445Z","shell.execute_reply.started":"2022-03-29T18:28:03.702510Z","shell.execute_reply":"2022-03-29T18:28:03.722370Z"},"trusted":true},"execution_count":39,"outputs":[]},{"cell_type":"code","source":"parch_survival_rate = data.groupby(\"Parch\").mean()[\"Survived\"]\nparch_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.724664Z","iopub.execute_input":"2022-03-29T18:28:03.724987Z","iopub.status.idle":"2022-03-29T18:28:03.737150Z","shell.execute_reply.started":"2022-03-29T18:28:03.724948Z","shell.execute_reply":"2022-03-29T18:28:03.735991Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"c_e_survival(data, \"Parch\", 'parch_survival_rate', parch_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.738627Z","iopub.execute_input":"2022-03-29T18:28:03.738999Z","iopub.status.idle":"2022-03-29T18:28:03.759037Z","shell.execute_reply.started":"2022-03-29T18:28:03.738966Z","shell.execute_reply":"2022-03-29T18:28:03.758033Z"},"trusted":true},"execution_count":41,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"Pclass\" survival rate","metadata":{}},{"cell_type":"code","source":"data[\"Pclass\"].unique()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.760494Z","iopub.execute_input":"2022-03-29T18:28:03.761223Z","iopub.status.idle":"2022-03-29T18:28:03.771412Z","shell.execute_reply.started":"2022-03-29T18:28:03.761175Z","shell.execute_reply":"2022-03-29T18:28:03.770156Z"},"trusted":true},"execution_count":42,"outputs":[]},{"cell_type":"code","source":"pclass_survival_rate = data.groupby(\"Pclass\").mean()[\"Survived\"]\npclass_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.773726Z","iopub.execute_input":"2022-03-29T18:28:03.774431Z","iopub.status.idle":"2022-03-29T18:28:03.788208Z","shell.execute_reply.started":"2022-03-29T18:28:03.774384Z","shell.execute_reply":"2022-03-29T18:28:03.787054Z"},"trusted":true},"execution_count":43,"outputs":[]},{"cell_type":"code","source":"c_e_survival(data, \"Pclass\", 'pclass_survival_rate', pclass_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.789617Z","iopub.execute_input":"2022-03-29T18:28:03.790073Z","iopub.status.idle":"2022-03-29T18:28:03.807088Z","shell.execute_reply.started":"2022-03-29T18:28:03.790026Z","shell.execute_reply":"2022-03-29T18:28:03.806013Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"SibSp\" survival rate","metadata":{}},{"cell_type":"code","source":"data[\"SibSp\"].unique()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.808369Z","iopub.execute_input":"2022-03-29T18:28:03.808973Z","iopub.status.idle":"2022-03-29T18:28:03.821954Z","shell.execute_reply.started":"2022-03-29T18:28:03.808926Z","shell.execute_reply":"2022-03-29T18:28:03.820768Z"},"trusted":true},"execution_count":45,"outputs":[]},{"cell_type":"code","source":"sibsp_survival_rate = data.groupby(\"SibSp\").mean()[\"Survived\"]\nsibsp_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.823476Z","iopub.execute_input":"2022-03-29T18:28:03.824137Z","iopub.status.idle":"2022-03-29T18:28:03.839933Z","shell.execute_reply.started":"2022-03-29T18:28:03.824086Z","shell.execute_reply":"2022-03-29T18:28:03.838756Z"},"trusted":true},"execution_count":46,"outputs":[]},{"cell_type":"code","source":"c_e_survival(data, \"SibSp\", 'sibsp_survival_rate', sibsp_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.841228Z","iopub.execute_input":"2022-03-29T18:28:03.841965Z","iopub.status.idle":"2022-03-29T18:28:03.857458Z","shell.execute_reply.started":"2022-03-29T18:28:03.841928Z","shell.execute_reply":"2022-03-29T18:28:03.856350Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"markdown","source":"#### Calculate the \"Sex\" survival rate","metadata":{}},{"cell_type":"code","source":"sex_survival_rate = data.groupby(\"Sex\").mean()[\"Survived\"]\nsex_survival_rate","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.858767Z","iopub.execute_input":"2022-03-29T18:28:03.859333Z","iopub.status.idle":"2022-03-29T18:28:03.870405Z","shell.execute_reply.started":"2022-03-29T18:28:03.859296Z","shell.execute_reply":"2022-03-29T18:28:03.869420Z"},"trusted":true},"execution_count":48,"outputs":[]},{"cell_type":"code","source":"c_e_survival(data, \"Sex\", 'sex_survival_rate', sex_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.874090Z","iopub.execute_input":"2022-03-29T18:28:03.874565Z","iopub.status.idle":"2022-03-29T18:28:03.888735Z","shell.execute_reply.started":"2022-03-29T18:28:03.874519Z","shell.execute_reply":"2022-03-29T18:28:03.887493Z"},"trusted":true},"execution_count":49,"outputs":[]},{"cell_type":"code","source":"# How many missing values are in each column\nfor i in data.columns:\n print(i, sum(data[i].isnull()))","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.890775Z","iopub.execute_input":"2022-03-29T18:28:03.891147Z","iopub.status.idle":"2022-03-29T18:28:03.915697Z","shell.execute_reply.started":"2022-03-29T18:28:03.891111Z","shell.execute_reply":"2022-03-29T18:28:03.914939Z"},"trusted":true},"execution_count":50,"outputs":[]},{"cell_type":"code","source":"g = []\nfor i in data[\"Ticket\"]:\n if \" \" in i:\n # Append the last element in the split This ensures that even with Tickets with multiple spaces,\n # It is the ticket number that gets saved\n g.append(i.split(\" \")[-1])\n else:\n g.append(i)\nlen(g)","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:03.917003Z","iopub.execute_input":"2022-03-29T18:28:03.917585Z","iopub.status.idle":"2022-03-29T18:28:03.926590Z","shell.execute_reply.started":"2022-03-29T18:28:03.917539Z","shell.execute_reply":"2022-03-29T18:28:03.925813Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"data[\"Ticket_num\"] = g","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:03.927795Z","iopub.execute_input":"2022-03-29T18:28:03.928239Z","iopub.status.idle":"2022-03-29T18:28:03.940523Z","shell.execute_reply.started":"2022-03-29T18:28:03.928205Z","shell.execute_reply":"2022-03-29T18:28:03.939624Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"# Check the number of unique numbers\ndata[\"Ticket_num\"].describe()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.941790Z","iopub.execute_input":"2022-03-29T18:28:03.942457Z","iopub.status.idle":"2022-03-29T18:28:03.956535Z","shell.execute_reply.started":"2022-03-29T18:28:03.942411Z","shell.execute_reply":"2022-03-29T18:28:03.955395Z"},"trusted":true},"execution_count":53,"outputs":[]},{"cell_type":"markdown","source":"There are waaaay too many unique numbers for it to be useful to the model","metadata":{}},{"cell_type":"code","source":"data[\"Ticket_num\"].replace(\"LINE\", \"1601\", inplace = True)","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:03.957788Z","iopub.execute_input":"2022-03-29T18:28:03.958157Z","iopub.status.idle":"2022-03-29T18:28:03.963313Z","shell.execute_reply.started":"2022-03-29T18:28:03.958125Z","shell.execute_reply":"2022-03-29T18:28:03.962273Z"},"trusted":true},"execution_count":54,"outputs":[]},{"cell_type":"code","source":"data[data[\"Ticket_num\"]== \"LINE\"]","metadata":{"editable":false,"execution":{"iopub.status.busy":"2022-03-29T18:28:03.965027Z","iopub.execute_input":"2022-03-29T18:28:03.965720Z","iopub.status.idle":"2022-03-29T18:28:03.986659Z","shell.execute_reply.started":"2022-03-29T18:28:03.965673Z","shell.execute_reply":"2022-03-29T18:28:03.985430Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestClassifier","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:03.988272Z","iopub.execute_input":"2022-03-29T18:28:03.989083Z","iopub.status.idle":"2022-03-29T18:28:04.125095Z","shell.execute_reply.started":"2022-03-29T18:28:03.989031Z","shell.execute_reply":"2022-03-29T18:28:04.124166Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"code","source":"pip install sklearn_evaluation","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:04.126456Z","iopub.execute_input":"2022-03-29T18:28:04.127021Z","iopub.status.idle":"2022-03-29T18:28:13.365827Z","shell.execute_reply.started":"2022-03-29T18:28:04.126986Z","shell.execute_reply":"2022-03-29T18:28:13.364338Z"},"trusted":true},"execution_count":57,"outputs":[]},{"cell_type":"code","source":"X = data.copy()\ny = X.pop(\"Survived\")\n\nfeatures = ['Pclass', 'Name', 'Age', 'Fare',\n 'Cabin_survival_rate', 'Embarked_survival_rate', 'age_survival_rate',\n 'pclass_survival_rate', 'sibsp_survival_rate', 'sex_survival_rate']\n\"\"\"\nfeatures = ['Pclass', 'Name', 'Age', 'Fare', 'Cabin_survival_rate', 'Embarked_survival_rate', 'age_survival_rate',\n 'pclass_survival_rate', 'sibsp_survival_rate', 'sex_survival_rate']\n\"\"\"\nX = X[features]\n\n","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:13.367717Z","iopub.execute_input":"2022-03-29T18:28:13.368066Z","iopub.status.idle":"2022-03-29T18:28:13.378857Z","shell.execute_reply.started":"2022-03-29T18:28:13.368030Z","shell.execute_reply":"2022-03-29T18:28:13.377741Z"},"trusted":true},"execution_count":58,"outputs":[]},{"cell_type":"code","source":"from sklearn_evaluation import plot","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:13.380191Z","iopub.execute_input":"2022-03-29T18:28:13.380639Z","iopub.status.idle":"2022-03-29T18:28:13.587983Z","shell.execute_reply.started":"2022-03-29T18:28:13.380594Z","shell.execute_reply":"2022-03-29T18:28:13.586804Z"},"trusted":true},"execution_count":59,"outputs":[]},{"cell_type":"code","source":"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n\nmodel = RandomForestClassifier()\nmodel.fit(X_train, y_train)\n\n# plot all features\nax = plot.feature_importances(model)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:13.589211Z","iopub.execute_input":"2022-03-29T18:28:13.589493Z","iopub.status.idle":"2022-03-29T18:28:14.030235Z","shell.execute_reply.started":"2022-03-29T18:28:13.589465Z","shell.execute_reply":"2022-03-29T18:28:14.029142Z"},"trusted":true},"execution_count":60,"outputs":[]},{"cell_type":"code","source":"n = 0\nfor i in X.columns:\n n += 1\n print(n, i)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.032860Z","iopub.execute_input":"2022-03-29T18:28:14.033325Z","iopub.status.idle":"2022-03-29T18:28:14.041012Z","shell.execute_reply.started":"2022-03-29T18:28:14.033279Z","shell.execute_reply":"2022-03-29T18:28:14.039929Z"},"trusted":true},"execution_count":61,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import confusion_matrix, classification_report\n\n\n# Compute predictions over the prediction space: y_pred\ny_pred = model.predict(X_test)\n\n# Print R^2\nprint(confusion_matrix(y_test, y_pred))\nprint(classification_report(y_test, y_pred))\n\n","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.042376Z","iopub.execute_input":"2022-03-29T18:28:14.043109Z","iopub.status.idle":"2022-03-29T18:28:14.084015Z","shell.execute_reply.started":"2022-03-29T18:28:14.043047Z","shell.execute_reply":"2022-03-29T18:28:14.083057Z"},"trusted":true},"execution_count":62,"outputs":[]},{"cell_type":"code","source":"print(cross_val_score(model, X, y, cv=3, scoring=\"f1\"))","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.085583Z","iopub.execute_input":"2022-03-29T18:28:14.086313Z","iopub.status.idle":"2022-03-29T18:28:14.824749Z","shell.execute_reply.started":"2022-03-29T18:28:14.086264Z","shell.execute_reply":"2022-03-29T18:28:14.823664Z"},"trusted":true},"execution_count":63,"outputs":[]},{"cell_type":"code","source":"test = pd.read_csv('/kaggle/input/titanic/test.csv')\ntest_data = test.copy()\ntest_data.head()\nId = test.copy()","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.826141Z","iopub.execute_input":"2022-03-29T18:28:14.826720Z","iopub.status.idle":"2022-03-29T18:28:14.843380Z","shell.execute_reply.started":"2022-03-29T18:28:14.826664Z","shell.execute_reply":"2022-03-29T18:28:14.842264Z"},"trusted":true},"execution_count":64,"outputs":[]},{"cell_type":"code","source":"get_cabin(test_data[\"Cabin\"])","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.844501Z","iopub.execute_input":"2022-03-29T18:28:14.844798Z","iopub.status.idle":"2022-03-29T18:28:14.901692Z","shell.execute_reply.started":"2022-03-29T18:28:14.844770Z","shell.execute_reply":"2022-03-29T18:28:14.900565Z"},"trusted":true},"execution_count":65,"outputs":[]},{"cell_type":"code","source":"replace_mean(test_data, [\"Age\", \"Fare\"])\n\nreplace_mode(test_data, \"Cabin\")","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.903304Z","iopub.execute_input":"2022-03-29T18:28:14.903721Z","iopub.status.idle":"2022-03-29T18:28:14.919475Z","shell.execute_reply.started":"2022-03-29T18:28:14.903674Z","shell.execute_reply":"2022-03-29T18:28:14.918485Z"},"trusted":true},"execution_count":66,"outputs":[]},{"cell_type":"code","source":"# How many missing values are in each column\nfor i in test_data.columns:\n print(i, sum(test_data[i].isnull()))\ntest_data.shape","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.920826Z","iopub.execute_input":"2022-03-29T18:28:14.921184Z","iopub.status.idle":"2022-03-29T18:28:14.942695Z","shell.execute_reply.started":"2022-03-29T18:28:14.921155Z","shell.execute_reply":"2022-03-29T18:28:14.942001Z"},"trusted":true},"execution_count":67,"outputs":[]},{"cell_type":"markdown","source":"#### Change \"Name\" to length of \"Name\"","metadata":{}},{"cell_type":"code","source":"test_data[\"Name\"] = [len(i) for i in test_data[\"Name\"]]\ntest_data[\"Name\"]","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.944085Z","iopub.execute_input":"2022-03-29T18:28:14.944606Z","iopub.status.idle":"2022-03-29T18:28:14.953334Z","shell.execute_reply.started":"2022-03-29T18:28:14.944572Z","shell.execute_reply":"2022-03-29T18:28:14.952560Z"},"trusted":true},"execution_count":68,"outputs":[]},{"cell_type":"markdown","source":"#### Cabin Survival Rate","metadata":{"execution":{"iopub.status.busy":"2022-03-28T20:37:31.226284Z","iopub.execute_input":"2022-03-28T20:37:31.226975Z","iopub.status.idle":"2022-03-28T20:37:31.238028Z","shell.execute_reply.started":"2022-03-28T20:37:31.226926Z","shell.execute_reply":"2022-03-28T20:37:31.23686Z"}}},{"cell_type":"code","source":"c_e_survival(test_data, \"Cabin\", \"Cabin_survival_rate\", Cabin_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.954527Z","iopub.execute_input":"2022-03-29T18:28:14.955041Z","iopub.status.idle":"2022-03-29T18:28:14.971095Z","shell.execute_reply.started":"2022-03-29T18:28:14.955001Z","shell.execute_reply":"2022-03-29T18:28:14.969939Z"},"trusted":true},"execution_count":69,"outputs":[]},{"cell_type":"markdown","source":"#### Embarked Survival Rate","metadata":{}},{"cell_type":"code","source":"c_e_survival(test_data, \"Embarked\", \"Embarked_survival_rate\", embarked_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.972490Z","iopub.execute_input":"2022-03-29T18:28:14.972841Z","iopub.status.idle":"2022-03-29T18:28:14.991768Z","shell.execute_reply.started":"2022-03-29T18:28:14.972807Z","shell.execute_reply":"2022-03-29T18:28:14.990924Z"},"trusted":true},"execution_count":70,"outputs":[]},{"cell_type":"markdown","source":"#### Age Survival Rate","metadata":{}},{"cell_type":"code","source":"# Let's group the ages together and then calculate the survival rate\nbins = pd.IntervalIndex.from_tuples([(-np.inf, 1),(1, 5),(5, 16), (16, 27), (27, 39), (39, 49), (49, 69), (69, np.inf)])\nbins","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:14.993027Z","iopub.execute_input":"2022-03-29T18:28:14.993538Z","iopub.status.idle":"2022-03-29T18:28:15.002279Z","shell.execute_reply.started":"2022-03-29T18:28:14.993487Z","shell.execute_reply":"2022-03-29T18:28:15.001280Z"},"trusted":true},"execution_count":71,"outputs":[]},{"cell_type":"code","source":"test_data['age_bracket'] = pd.cut(test_data['Age'], bins)\nc_e_survival(test_data, \"age_bracket\", 'age_survival_rate', age_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.003490Z","iopub.execute_input":"2022-03-29T18:28:15.004038Z","iopub.status.idle":"2022-03-29T18:28:15.100177Z","shell.execute_reply.started":"2022-03-29T18:28:15.004000Z","shell.execute_reply":"2022-03-29T18:28:15.099104Z"},"trusted":true},"execution_count":72,"outputs":[]},{"cell_type":"markdown","source":"#### Pclass Survival Rate","metadata":{}},{"cell_type":"code","source":"c_e_survival(test_data, \"Pclass\", 'pclass_survival_rate', pclass_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.101390Z","iopub.execute_input":"2022-03-29T18:28:15.101818Z","iopub.status.idle":"2022-03-29T18:28:15.112762Z","shell.execute_reply.started":"2022-03-29T18:28:15.101785Z","shell.execute_reply":"2022-03-29T18:28:15.111966Z"},"trusted":true},"execution_count":73,"outputs":[]},{"cell_type":"markdown","source":"#### SibSp survival rate","metadata":{}},{"cell_type":"code","source":"c_e_survival(test_data, \"SibSp\", 'sibsp_survival_rate', sibsp_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.113981Z","iopub.execute_input":"2022-03-29T18:28:15.114443Z","iopub.status.idle":"2022-03-29T18:28:15.128126Z","shell.execute_reply.started":"2022-03-29T18:28:15.114413Z","shell.execute_reply":"2022-03-29T18:28:15.127152Z"},"trusted":true},"execution_count":74,"outputs":[]},{"cell_type":"markdown","source":"#### Sex survival rate","metadata":{}},{"cell_type":"code","source":"c_e_survival(test_data, \"Sex\", 'sex_survival_rate', sex_survival_rate)","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.129786Z","iopub.execute_input":"2022-03-29T18:28:15.130525Z","iopub.status.idle":"2022-03-29T18:28:15.145847Z","shell.execute_reply.started":"2022-03-29T18:28:15.130478Z","shell.execute_reply":"2022-03-29T18:28:15.144741Z"},"trusted":true},"execution_count":75,"outputs":[]},{"cell_type":"code","source":"# How many missing values are in each column\nfor i in test_data.columns:\n print(i, sum(test_data[i].isnull()))","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.147270Z","iopub.execute_input":"2022-03-29T18:28:15.147814Z","iopub.status.idle":"2022-03-29T18:28:15.167967Z","shell.execute_reply.started":"2022-03-29T18:28:15.147780Z","shell.execute_reply":"2022-03-29T18:28:15.167195Z"},"trusted":true},"execution_count":76,"outputs":[]},{"cell_type":"code","source":"predictions = model.predict(test_data[features])","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.169355Z","iopub.execute_input":"2022-03-29T18:28:15.169835Z","iopub.status.idle":"2022-03-29T18:28:15.197361Z","shell.execute_reply.started":"2022-03-29T18:28:15.169799Z","shell.execute_reply":"2022-03-29T18:28:15.196500Z"},"trusted":true},"execution_count":77,"outputs":[]},{"cell_type":"code","source":"output = pd.DataFrame({'PassengerId': Id.PassengerId, 'Survived': predictions})\noutput.to_csv('my_submission.csv', index=False)\nprint(\"Your submission was successfully saved!\")","metadata":{"execution":{"iopub.status.busy":"2022-03-29T18:28:15.198588Z","iopub.execute_input":"2022-03-29T18:28:15.199005Z","iopub.status.idle":"2022-03-29T18:28:15.208964Z","shell.execute_reply.started":"2022-03-29T18:28:15.198974Z","shell.execute_reply":"2022-03-29T18:28:15.208016Z"},"trusted":true},"execution_count":78,"outputs":[]}]} \ No newline at end of file