georgia-tech-db · americast · Oct 5, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 24, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -480,7 +480,7 @@ jobs:
         - checkout
         - run:           
             name: Install EvaDB package from GitHub repo and run tests
-            no_output_timeout: 30m # 30 minute timeout
+            no_output_timeout: 40m # 40 minute timeout
             command: |
               python -m venv test_evadb
               source test_evadb/bin/activate

diff --git a/docs/source/reference/ai/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst
@@ -75,7 +75,7 @@ EvaDB's default forecast framework is `statsforecast <https://nixtla.github.io/s
 
 .. note::
 
-   `Forecasting` function also logs suggestions. Logged information, such as metrics and suggestions, is sent to STDOUT by default. If you wish not to print it, please send `FALSE` as an optional argument while calling the function. Eg. `SELECT Forecast(FALSE);`
+   `Forecasting` function also logs suggestions. Logged information, such as metrics and suggestions, is sent to STDOUT by default. A figure is also plotted and is saved in a binary format supported by OpenCV in the `plot` column of the output table. It maybe rendered using the `cv2.imdecode` function. If you wish not to obtain the logged information, please send `FALSE` as an optional argument while calling the function. Eg. `SELECT Forecast(FALSE);`
 
 
 Below is an example query specifying the above parameters:

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
@@ -29,7 +29,6 @@
 from evadb.binder.statement_binder_context import StatementBinderContext
 from evadb.catalog.catalog_type import ColumnType, TableType
 from evadb.catalog.catalog_utils import is_document_table
-from evadb.catalog.sql_config import RESTRICTED_COL_NAMES
 from evadb.expression.abstract_expression import AbstractExpression, ExpressionType
 from evadb.expression.function_expression import FunctionExpression
 from evadb.expression.tuple_value_expression import TupleValueExpression
@@ -137,6 +136,12 @@ def _bind_create_function_statement(self, node: CreateFunctionStatement):
                             None,
                             None,
                         ),
+                        ColumnDefinition(
+                            "plot",
+                            ColumnType.ANY,
+                            None,
+                            None,
+                        ),
                     ]
                 )
             else:
@@ -211,12 +216,6 @@ def _bind_delete_statement(self, node: DeleteTableStatement):
 
     @bind.register(CreateTableStatement)
     def _bind_create_statement(self, node: CreateTableStatement):
-        # we don't allow certain keywords in the column_names
-        for col in node.column_list:
-            assert (
-                col.name.lower() not in RESTRICTED_COL_NAMES
-            ), f"EvaDB does not allow to create a table with column name {col.name}"
-
         if node.query is not None:
             self.bind(node.query)
 

diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py
@@ -555,7 +555,7 @@ def get_optuna_config(trial):
                 raise FunctionIODefinitionError(err_msg)
 
             model = StatsForecast(
-                [model_here(season_length=season_length)], freq=new_freq
+                [model_here(season_length=season_length)], freq=new_freq, n_jobs=-1
             )
 
         data["ds"] = pd.to_datetime(data["ds"])
@@ -668,6 +668,8 @@ def get_optuna_config(trial):
             model_path = os.path.join(model_dir, existing_model_files[-1])
         io_list = self._resolve_function_io(None)
         data["ds"] = data.ds.astype(str)
+        last_ds = list(data["ds"])[-2 * horizon :]
+        last_y = list(data["y"])[-2 * horizon :]
         metadata_here = [
             FunctionMetadataCatalogEntry("model_name", arg_map["model"]),
             FunctionMetadataCatalogEntry("model_path", model_path),
@@ -683,6 +685,8 @@ def get_optuna_config(trial):
             FunctionMetadataCatalogEntry("horizon", horizon),
             FunctionMetadataCatalogEntry("library", library),
             FunctionMetadataCatalogEntry("conf", conf),
+            FunctionMetadataCatalogEntry("last_ds", last_ds),
+            FunctionMetadataCatalogEntry("last_y", last_y),
         ]
 
         return (

diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py
@@ -130,8 +130,12 @@ def evaluate(self, batch: Batch, **kwargs) -> Batch:
 
             # process outcomes only if output is not empty
             if outcomes.frames.empty is False:
-                outcomes = outcomes.project(self.projection_columns)
-                outcomes.modify_column_alias(self.alias)
+                if self._function().name == "ForecastModel":
+                    outcomes = outcomes.project(self.projection_columns, forecast=True)
+                    outcomes.modify_column_alias(self.alias, forecast=True)
+                else:
+                    outcomes = outcomes.project(self.projection_columns)
+                    outcomes.modify_column_alias(self.alias)
 
         # record the number of function calls
         self._stats.num_calls += len(batch)

diff --git a/evadb/functions/forecast.py b/evadb/functions/forecast.py
@@ -17,6 +17,9 @@
 import os
 import pickle
 
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
 from evadb.functions.abstract.abstract_function import AbstractFunction
@@ -39,6 +42,8 @@ def setup(
         horizon: int,
         library: str,
         conf: int,
+        last_ds: list,
+        last_y: list,
     ):
         self.library = library
         if "neuralforecast" in self.library:
@@ -67,6 +72,8 @@ def setup(
                 self.rmse = float(f.readline())
                 if "arima" in model_name.lower():
                     self.hypers = "p,d,q: " + f.readline()
+        self.last_ds = last_ds
+        self.last_y = last_y
 
     def forward(self, data) -> pd.DataFrame:
         log_str = ""
@@ -79,7 +86,7 @@ def forward(self, data) -> pd.DataFrame:
 
         # Feedback
         if len(data) == 0 or list(list(data.iloc[0]))[0] is True:
-            # Suggestions
+            ## Suggestions
             suggestion_list = []
             # 1: Flat predictions
             if self.library == "statsforecast":
@@ -95,12 +102,69 @@ def forward(self, data) -> pd.DataFrame:
             for suggestion in set(suggestion_list):
                 log_str += "\nSUGGESTION: " + self.suggestion_dict[suggestion]
 
-            # Metrics
+            ## Metrics
             if self.rmse is not None:
                 log_str += "\nMean normalized RMSE: " + str(self.rmse)
             if self.hypers is not None:
                 log_str += "\nHyperparameters: " + self.hypers
 
+            ## Plot figure
+
+            pred_plt = self.last_y + list(
+                forecast_df[
+                    self.model_name
+                    if self.library == "statsforecast"
+                    else self.model_name + "-median"
+                ]
+            )
+            pred_plt_lo = self.last_y + list(
+                forecast_df[self.model_name + "-lo-" + str(self.conf)]
+            )
+            pred_plt_hi = self.last_y + list(
+                forecast_df[self.model_name + "-hi-" + str(self.conf)]
+            )
+
+            plt.plot(pred_plt, label="Prediction")
+            plt.fill_between(
+                x=range(len(pred_plt)), y1=pred_plt_lo, y2=pred_plt_hi, alpha=0.3
+            )
+            plt.plot(self.last_y, label="Actual")
+            plt.xlabel("Time")
+            plt.ylabel("Value")
+            xtick_strs = self.last_ds + list(forecast_df["ds"])
+            num_to_keep_args = list(
+                range(0, len(xtick_strs), int((len(xtick_strs) - 2) / 8))
+            ) + [len(xtick_strs) - 1]
+            xtick_strs = [
+                x if i in num_to_keep_args else "" for i, x in enumerate(xtick_strs)
+            ]
+            plt.xticks(range(len(pred_plt)), xtick_strs, rotation=85)
+            plt.legend()
+            plt.tight_layout()
+
+            # convert plt figure to opencv, inspired from https://copyprogramming.com/howto/convert-matplotlib-figure-to-cv2-image-a-complete-guide-with-examples#converting-matplotlib-figure-to-cv2-image
+            # convert figure to canvas
+            canvas = plt.get_current_fig_manager().canvas
+
+            # render the canvas
+            canvas.draw()
+
+            # convert canvas to image
+            img = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
+            img = img.reshape(canvas.get_width_height()[::-1] + (3,))
+
+            # convert image to cv2 format
+            cv2_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+
+            # Conver to bytes
+            _, buffer = cv2.imencode(".jpg", cv2_img)
+            img_bytes = buffer.tobytes()
+
+            # Add to dataframe as a plot
+            forecast_df["plot"] = [img_bytes] + [None] * (len(forecast_df) - 1)
+
+            log_str += "\nA plot has been saved in the 'plot' column of the output table. It maybe rendered using the cv2.imdecode function."
+
             print(log_str)
 
         forecast_df = forecast_df.rename(

diff --git a/evadb/models/storage/batch.py b/evadb/models/storage/batch.py
@@ -235,15 +235,17 @@ def update_indices(self, indices: List, other: Batch):
     def file_paths(self) -> Iterable:
         yield from self._frames["file_path"]
 
-    def project(self, cols: None) -> Batch:
+    def project(self, cols: None, forecast: bool = False) -> Batch:
         """
         Takes as input the column list, returns the projection.
         We do a copy for now.
         """
         cols = cols or []
         verified_cols = [c for c in cols if c in self._frames]
         unknown_cols = list(set(cols) - set(verified_cols))
-        assert len(unknown_cols) == 0, unknown_cols
+        assert len(unknown_cols) == 0 or (
+            forecast is True and unknown_cols == ["plot"]
+        ), unknown_cols
         return Batch(self._frames[verified_cols])
 
     @classmethod
@@ -405,14 +407,20 @@ def reset_index(self):
         """Resets the index of the data frame in the batch"""
         self._frames.reset_index(drop=True, inplace=True)
 
-    def modify_column_alias(self, alias: Union[Alias, str]) -> None:
+    def modify_column_alias(
+        self, alias: Union[Alias, str], forecast: bool = False
+    ) -> None:
         # a, b, c -> table1.a, table1.b, table1.c
         # t1.a -> t2.a
         if isinstance(alias, str):
             alias = Alias(alias)
         new_col_names = []
         if len(alias.col_names):
-            if len(self.columns) != len(alias.col_names):
+            if (len(self.columns) != len(alias.col_names) and forecast is False) or (
+                forecast is True
+                and (self.columns != alias.col_names)
+                and list(set(alias.col_names) - set(self.columns)) != ["plot"]
+            ):
                 err_msg = (
                     f"Expected {len(alias.col_names)} columns {alias.col_names},"
                     f"got {len(self.columns)} columns {self.columns}."
@@ -431,7 +439,10 @@ def modify_column_alias(self, alias: Union[Alias, str]) -> None:
                 else:
                     new_col_names.append("{}.{}".format(alias.alias_name, col_name))
 
-        self._frames.columns = new_col_names
+        if forecast and list(set(alias.col_names) - set(self.columns)) == ["plot"]:
+            self._frames.columns = new_col_names[:-1]
+        else:
+            self._frames.columns = new_col_names
 
     def drop_column_alias(self) -> None:
         # table1.a, table1.b, table1.c -> a, b, c

diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py
@@ -101,6 +101,7 @@ def test_forecast(self):
                 "airforecast.y",
                 "airforecast.y-lo",
                 "airforecast.y-hi",
+                "airforecast.plot",
             ],
         )
 
@@ -122,7 +123,7 @@ def test_forecast_neuralforecast(self):
         execute_query_fetch_all(self.evadb, create_predict_udf)
 
         predict_query = """
-            SELECT AirPanelForecast() order by y;
+            SELECT AirPanelForecast(FALSE) order by y;
         """
         result = execute_query_fetch_all(self.evadb, predict_query)
         self.assertEqual(len(result), 24)
@@ -167,6 +168,7 @@ def test_forecast_with_column_rename(self):
                 "homeforecast.ma",
                 "homeforecast.ma-lo",
                 "homeforecast.ma-hi",
+                "homeforecast.plot",
             ],
         )
 

diff --git a/test/unit_tests/binder/test_statement_binder.py b/test/unit_tests/binder/test_statement_binder.py
@@ -485,6 +485,11 @@ def test_bind_create_function_should_bind_forecast_with_default_columns(self):
                 type=ColumnType.FLOAT,
                 array_type=None,
             )
+            plot_col_obj = ColumnCatalogEntry(
+                name="plot",
+                type=ColumnType.ANY,
+                array_type=None,
+            )
             create_function_statement.query.target_list = [
                 TupleValueExpression(
                     name=id_col_obj.name, table_alias="a", col_object=id_col_obj
@@ -522,6 +527,7 @@ def test_bind_create_function_should_bind_forecast_with_default_columns(self):
                         y_col_obj,
                         y_lo_col_obj,
                         y_hi_col_obj,
+                        plot_col_obj,
                     )
                 ]
             )
@@ -560,6 +566,11 @@ def test_bind_create_function_should_bind_forecast_with_renaming_columns(self):
                 type=ColumnType.FLOAT,
                 array_type=None,
             )
+            plot_col_obj = ColumnCatalogEntry(
+                name="plot",
+                type=ColumnType.ANY,
+                array_type=None,
+            )
             create_function_statement.query.target_list = [
                 TupleValueExpression(
                     name=id_col_obj.name, table_alias="a", col_object=id_col_obj
@@ -601,6 +612,7 @@ def test_bind_create_function_should_bind_forecast_with_renaming_columns(self):
                         y_col_obj,
                         y_lo_col_obj,
                         y_hi_col_obj,
+                        plot_col_obj,
                     )
                 ]
             )