|
6 | 6 | "name": "Pandas Tutorial.ipynb",
|
7 | 7 | "provenance": [],
|
8 | 8 | "collapsed_sections": [],
|
9 |
| - "authorship_tag": "ABX9TyO8dbP1sfunTXTjSivB0W/Q", |
| 9 | + "authorship_tag": "ABX9TyMZ1qeD4j0AhLXdG2qW342F", |
10 | 10 | "include_colab_link": true
|
11 | 11 | },
|
12 | 12 | "kernelspec": {
|
|
68 | 68 | "```\n",
|
69 | 69 | "!pip install pandas\n",
|
70 | 70 | "```\n",
|
71 |
| - "and press enter to install library if not available in your workspace." |
| 71 | + "and press shift + enter to install library if not available in your workspace." |
72 | 72 | ]
|
73 | 73 | },
|
74 | 74 | {
|
|
88 | 88 | "source": [
|
89 | 89 | "import pandas as pd"
|
90 | 90 | ],
|
91 |
| - "execution_count": 4, |
| 91 | + "execution_count": null, |
92 | 92 | "outputs": []
|
93 | 93 | },
|
94 | 94 | {
|
|
112 | 112 | "# df = pd.read_csv('Advertising.csv', index_col =\"TV\" ) # For custom index column\n",
|
113 | 113 | "#print(df)\n"
|
114 | 114 | ],
|
115 |
| - "execution_count": 5, |
| 115 | + "execution_count": null, |
116 | 116 | "outputs": []
|
117 | 117 | },
|
118 | 118 | {
|
|
147 | 147 | "df.head()# By default, shows first 5 rows of data\n",
|
148 | 148 | "#df.tail()# By default, shows last 5 rows of data "
|
149 | 149 | ],
|
150 |
| - "execution_count": 6, |
| 150 | + "execution_count": null, |
151 | 151 | "outputs": [
|
152 | 152 | {
|
153 | 153 | "output_type": "execute_result",
|
|
249 | 249 | "source": [
|
250 | 250 | "df.columns"
|
251 | 251 | ],
|
252 |
| - "execution_count": 7, |
| 252 | + "execution_count": null, |
253 | 253 | "outputs": [
|
254 | 254 | {
|
255 | 255 | "output_type": "execute_result",
|
|
275 | 275 | "source": [
|
276 | 276 | "df.shape"
|
277 | 277 | ],
|
278 |
| - "execution_count": 8, |
| 278 | + "execution_count": null, |
279 | 279 | "outputs": [
|
280 | 280 | {
|
281 | 281 | "output_type": "execute_result",
|
|
301 | 301 | "source": [
|
302 | 302 | "df.info()"
|
303 | 303 | ],
|
304 |
| - "execution_count": 9, |
| 304 | + "execution_count": null, |
305 | 305 | "outputs": [
|
306 | 306 | {
|
307 | 307 | "output_type": "stream",
|
|
337 | 337 | "df.describe()\n",
|
338 | 338 | "# df.describe().T # For transpose view of the same"
|
339 | 339 | ],
|
340 |
| - "execution_count": 10, |
| 340 | + "execution_count": null, |
341 | 341 | "outputs": [
|
342 | 342 | {
|
343 | 343 | "output_type": "execute_result",
|
|
467 | 467 | "df.isnull().values.any() # If any value is missing\n",
|
468 | 468 | "df.isnull().sum() # Total missing values"
|
469 | 469 | ],
|
470 |
| - "execution_count": 11, |
| 470 | + "execution_count": null, |
471 | 471 | "outputs": [
|
472 | 472 | {
|
473 | 473 | "output_type": "execute_result",
|
|
508 | 508 | "df.drop([198,199], inplace =True) # rows with index labeled as 198 and 199 will be dropped\n",
|
509 | 509 | "df.shape"
|
510 | 510 | ],
|
511 |
| - "execution_count": 12, |
| 511 | + "execution_count": null, |
512 | 512 | "outputs": [
|
513 | 513 | {
|
514 | 514 | "output_type": "execute_result",
|
|
536 | 536 | "df = df.drop(drop_list, axis=1)\n",
|
537 | 537 | "df.columns # Unnamed column will be dropped"
|
538 | 538 | ],
|
539 |
| - "execution_count": 13, |
| 539 | + "execution_count": null, |
540 | 540 | "outputs": [
|
541 | 541 | {
|
542 | 542 | "output_type": "execute_result",
|
|
563 | 563 | "df.drop(df.columns[[0, 2]], axis = 1, inplace = True)\n",
|
564 | 564 | "df.columns "
|
565 | 565 | ],
|
566 |
| - "execution_count": 14, |
| 566 | + "execution_count": null, |
567 | 567 | "outputs": [
|
568 | 568 | {
|
569 | 569 | "output_type": "execute_result",
|
|
599 | 599 | "df = pd.read_csv('Advertising.csv')\n",
|
600 | 600 | "print(df.head())"
|
601 | 601 | ],
|
602 |
| - "execution_count": 15, |
| 602 | + "execution_count": null, |
603 | 603 | "outputs": [
|
604 | 604 | {
|
605 | 605 | "output_type": "stream",
|
|
638 | 638 | "data = df[['TV', 'radio']]\n",
|
639 | 639 | "data"
|
640 | 640 | ],
|
641 |
| - "execution_count": 16, |
| 641 | + "execution_count": null, |
642 | 642 | "outputs": [
|
643 | 643 | {
|
644 | 644 | "output_type": "execute_result",
|
|
771 | 771 | "source": [
|
772 | 772 | "df.sort_values(['radio', 'TV'], ascending = False)# Set ascending true or false to get different order"
|
773 | 773 | ],
|
774 |
| - "execution_count": 17, |
| 774 | + "execution_count": null, |
775 | 775 | "outputs": [
|
776 | 776 | {
|
777 | 777 | "output_type": "execute_result",
|
|
940 | 940 | "source": [
|
941 | 941 | "df[df.radio >= 49]# Can also use >,<, <=,=="
|
942 | 942 | ],
|
943 |
| - "execution_count": 18, |
| 943 | + "execution_count": null, |
944 | 944 | "outputs": [
|
945 | 945 | {
|
946 | 946 | "output_type": "execute_result",
|
|
1053 | 1053 | "source": [
|
1054 | 1054 | "df[df['TV'].between(239,248)]"
|
1055 | 1055 | ],
|
1056 |
| - "execution_count": 19, |
| 1056 | + "execution_count": null, |
1057 | 1057 | "outputs": [
|
1058 | 1058 | {
|
1059 | 1059 | "output_type": "execute_result",
|
|
1174 | 1174 | "source": [
|
1175 | 1175 | "df.query('239 <= TV <= 248')"
|
1176 | 1176 | ],
|
1177 |
| - "execution_count": 20, |
| 1177 | + "execution_count": null, |
1178 | 1178 | "outputs": [
|
1179 | 1179 | {
|
1180 | 1180 | "output_type": "execute_result",
|
|
1295 | 1295 | "source": [
|
1296 | 1296 | "df[(df['TV'] >= 239) & (df['TV'] <= 248)]"
|
1297 | 1297 | ],
|
1298 |
| - "execution_count": 21, |
| 1298 | + "execution_count": null, |
1299 | 1299 | "outputs": [
|
1300 | 1300 | {
|
1301 | 1301 | "output_type": "execute_result",
|
|
1425 | 1425 | "source": [
|
1426 | 1426 | "df.iloc[: , 0:3]"
|
1427 | 1427 | ],
|
1428 |
| - "execution_count": 22, |
| 1428 | + "execution_count": null, |
1429 | 1429 | "outputs": [
|
1430 | 1430 | {
|
1431 | 1431 | "output_type": "execute_result",
|
|
1571 | 1571 | "# use negative to exclude the last 3 columns\n",
|
1572 | 1572 | "df.iloc[: , :-3]"
|
1573 | 1573 | ],
|
1574 |
| - "execution_count": 23, |
| 1574 | + "execution_count": null, |
1575 | 1575 | "outputs": [
|
1576 | 1576 | {
|
1577 | 1577 | "output_type": "execute_result",
|
|
1704 | 1704 | "source": [
|
1705 | 1705 | "df.iloc[0:3 , :]"
|
1706 | 1706 | ],
|
1707 |
| - "execution_count": 24, |
| 1707 | + "execution_count": null, |
1708 | 1708 | "outputs": [
|
1709 | 1709 | {
|
1710 | 1710 | "output_type": "execute_result",
|
|
1798 | 1798 | "source": [
|
1799 | 1799 | "df.iloc[:-3 , :]"
|
1800 | 1800 | ],
|
1801 |
| - "execution_count": 25, |
| 1801 | + "execution_count": null, |
1802 | 1802 | "outputs": [
|
1803 | 1803 | {
|
1804 | 1804 | "output_type": "execute_result",
|
|
1966 | 1966 | "source": [
|
1967 | 1967 | "df.sum(axis=0)#axis 1 for rows"
|
1968 | 1968 | ],
|
1969 |
| - "execution_count": 27, |
| 1969 | + "execution_count": null, |
1970 | 1970 | "outputs": [
|
1971 | 1971 | {
|
1972 | 1972 | "output_type": "execute_result",
|
|
2003 | 2003 | "source": [
|
2004 | 2004 | "df['TV'].sum()"
|
2005 | 2005 | ],
|
2006 |
| - "execution_count": 28, |
| 2006 | + "execution_count": null, |
2007 | 2007 | "outputs": [
|
2008 | 2008 | {
|
2009 | 2009 | "output_type": "execute_result",
|
|
2029 | 2029 | "source": [
|
2030 | 2030 | "df.TV.mean()"
|
2031 | 2031 | ],
|
2032 |
| - "execution_count": 29, |
| 2032 | + "execution_count": null, |
2033 | 2033 | "outputs": [
|
2034 | 2034 | {
|
2035 | 2035 | "output_type": "execute_result",
|
|
2055 | 2055 | "source": [
|
2056 | 2056 | "df.TV.median()"
|
2057 | 2057 | ],
|
2058 |
| - "execution_count": 30, |
| 2058 | + "execution_count": null, |
2059 | 2059 | "outputs": [
|
2060 | 2060 | {
|
2061 | 2061 | "output_type": "execute_result",
|
|
2081 | 2081 | "source": [
|
2082 | 2082 | "df.TV.mode()"
|
2083 | 2083 | ],
|
2084 |
| - "execution_count": 31, |
| 2084 | + "execution_count": null, |
2085 | 2085 | "outputs": [
|
2086 | 2086 | {
|
2087 | 2087 | "output_type": "execute_result",
|
|
2117 | 2117 | "source": [
|
2118 | 2118 | "df['TV'].corr(df['radio'], method='kendall')# method pearson, spearman"
|
2119 | 2119 | ],
|
2120 |
| - "execution_count": 40, |
| 2120 | + "execution_count": null, |
2121 | 2121 | "outputs": [
|
2122 | 2122 | {
|
2123 | 2123 | "output_type": "execute_result",
|
|
0 commit comments