diff --git a/3.2 Regression.ipynb b/3.2 Regression.ipynb index 49d0dbc..2f95fdc 100644 --- a/3.2 Regression.ipynb +++ b/3.2 Regression.ipynb @@ -16,7 +16,7 @@ "source": [ "\n", "# Ideas\n", - "- Load data from the last excercise (cleaned data including the best combination of MERRA-2 product and observed ground data)\n", + "- Load data from the last exercise (cleaned data including the best combination of MERRA-2 product and observed ground data)\n", "- Let try with simple prediction using Linear Regresssion, Logistic Regression, Decision Tree and RandomForest Regression\n", "- Measured the accurate (or the error) from those technique\n", "- Apply the outcome with a forecast data source such as from DarkSky" @@ -625,7 +625,7 @@ "metadata": {}, "source": [ "## Fill NaN values\n", - "- Data are likey assembled with missing values\n", + "- Data are likely assembled with missing values\n", "- Regression or machine learning works better with completed dataset" ] }, @@ -955,7 +955,7 @@ } ], "source": [ - "# convert the inputed dataset and and compared\n", + "# convert the inputed dataset and compared\n", "df2 = pd.DataFrame(data=df2full, columns=df1.columns)\n", "df2.info()" ] @@ -3149,7 +3149,7 @@ "metadata": {}, "source": [ "- so we can get a RMSE = 20 from several model, which is about 50% as the relative standard deviation\n", - "- this dataset is combined from several source, but not easy to get from a forecast product (in fact, I am struggling to get those), so we will try out a dataset with lessure feasture, " + "- this dataset is combined from several source, but not easy to get from a forecast product (in fact, I am struggling to get those), so we will try out a dataset with less feastures, " ] }, { @@ -3157,7 +3157,7 @@ "metadata": {}, "source": [ "# DarkSky Dataset\n", - "- you can check out this API at DarkSky.net" + "- you can check out this API at [DarkSky.net](https://darksky.net/). After acquired by Apple, the future of this open API is unsure. The registration is closed as well. Alternatively, check out [OpenWeatherMap.org](https://openweathermap.org/)" ] }, { @@ -3173,7 +3173,7 @@ "metadata": {}, "outputs": [], "source": [ - "# laod data in\n", + "# load data in\n", "dk = pd.read_csv('data/darksky_hanoi_2018.csv', parse_dates=['time'], index_col=['time'])" ] }, @@ -4008,7 +4008,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "- not much worse, infact, with less parameters and get a similar outcome, that is actually encouraging" + "- not much worse, in fact, with less parameters and get a similar outcome, that is actually encouraging" ] }, { @@ -4036,7 +4036,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# RMSE" + "# RMSE\n", + "- again, this is Root Mean Squared Error. If we assumed the errors is random, then the distribution of error to the mean value shouls be in standard distribution (Gaussian Distribution). Then the RMSE is the Standard Deviation (SD). The ratio of SD to the mean value in percent is called Relative Standard Deviation." ] }, { diff --git a/PDF/3.2 Regression.pdf b/PDF/3.2 Regression.pdf index bd757ba..ecce884 100644 Binary files a/PDF/3.2 Regression.pdf and b/PDF/3.2 Regression.pdf differ