Update tradinggym.py

Fix unstable initial portfolio value. Initial step calculation now precedes random splitting.
astrologos · May 23, 2023 · 7bcf38f · 7bcf38f
1 parent 51eaede
commit 7bcf38f
Showing 1 changed file with 24 additions and 18 deletions.
diff --git a/tradinggym.py b/tradinggym.py
@@ -105,7 +105,16 @@ def reset(self, start_index = None):
         Raises:
             ValueError: If the start index is out of bounds.
         """
+
+        # Randomly initialize start index
+        if start_index is None:
+            self.current_step = np.random.randint(1 + 2*self.window_size, len(self.observations) - self.max_steps - 2*(self.window_size + 1))  # Initialize current step randomly
+        elif start_index > 1 + 2*self.window_size or start_index < len(self.observations) - self.window_size - self.max_steps:
+            raise ValueError('Initial step must be between (1 + 2*window_size, len(observations) - window_size - max_steps). \n' + 
+            'It is not advised to evaluate the model near the bounds of the observation data.')
+        else: self.current_step = start_index
 
+        # Randomly initialize portfolio split
         if self.random_split:
             # Set vals to zero so that portfolio value is calculated solely on shares value,
             # this allows us to make up the difference in the balance so that portfolio value equals initial balance
@@ -120,12 +129,6 @@ def reset(self, start_index = None):
             self.balance = self.initial_value  # Reset balance
             self.shares = 0.0  # Reset shares
 
-        if start_index is None:
-            self.current_step = np.random.randint(1 + 2*self.window_size, len(self.observations) - self.max_steps - 2*(self.window_size + 1))  # Initialize current step randomly
-        elif start_index > 1 + 2*self.window_size or start_index < len(self.observations) - self.window_size - self.max_steps:
-            raise ValueError('Initial step must be between (1 + 2*window_size, len(observations) - window_size - max_steps). \n' + 
-            'It is not advised to evaluate the model near the bounds of the observation data.')
-        else: self.current_step = start_index
         return self._get_observation()  # Return observation
 
     def step(self, action):
@@ -241,8 +244,22 @@ def evaluate(self, frame_length, start_index = None, render=True, model=None, de
 
         # Reset the environment for evaluation
         self.reset()
+
+        # Randomly select a subset of the data for evaluation
+        if start_index is None:
+            self.current_step = np.random.randint(1 + 2*self.window_size, len(self.observations) - self.max_steps - 2*(self.window_size + 1) - frame_length)  # Initialize current step randomly
+        elif start_index < 1 + 2*self.window_size or start_index > len(self.observations) - self.max_steps - 2*(self.window_size + 1) - frame_length:
+            raise ValueError('Initial step must be on the interval (1 + 2*window_size, len(observations) - max_steps - 2*window_size - frame_length. \n)' + 
+            'It is not advised to evaluate the model near the bounds of the observation data.')
+        else: self.current_step = start_index
+        eval_start_index = self.current_step + self.window_size
+        eval_end_index = eval_start_index + frame_length
+        eval_data = self.observations.iloc[eval_start_index:eval_end_index].reset_index(drop=True)
+
+        # Randomly assign initial position split
         t0balance = init_balance
         t0shares = init_shares
+
         if self.random_split:
             # Initialize vals to zero so that portfolio value is calculated solely on shares value,
             # this allows us to make up the different in the balance so that portfolio value equals initial balance
@@ -261,17 +278,6 @@ def evaluate(self, frame_length, start_index = None, render=True, model=None, de
         t0shares = self.shares
 
 
-        # Randomly select a subset of the data for evaluation
-        if start_index is None:
-            self.current_step = np.random.randint(1 + 2*self.window_size, len(self.observations) - self.max_steps - 2*(self.window_size + 1) - frame_length)  # Initialize current step randomly
-        elif start_index < 1 + 2*self.window_size or start_index > len(self.observations) - self.max_steps - 2*(self.window_size + 1) - frame_length:
-            raise ValueError('Initial step must be on the interval (1 + 2*window_size, len(observations) - max_steps - 2*window_size - frame_length. \n)' + 
-            'It is not advised to evaluate the model near the bounds of the observation data.')
-        else: self.current_step = start_index
-        eval_start_index = self.current_step + self.window_size
-        eval_end_index = eval_start_index + frame_length
-        eval_data = self.observations.iloc[eval_start_index:eval_end_index].reset_index(drop=True)
-
         fig = None
 
         # Price subplot
@@ -333,7 +339,7 @@ def evaluate(self, frame_length, start_index = None, render=True, model=None, de
         # Print evaluation metrics
         if verbose>0:
             print("Evaluation Metrics:  ")
-            print('Initial value:       ' + format(portfolio_vals[0]))
+            print('Initial value:       ' + format(portfolio_vals[0],'.2f'))
             print('Initial balance:     ' + format(t0balance, '.2f'))
             print('Initial shares:      ' + format(t0shares, '.2f'))
             print('Initial split:       ' + format(1-split,'.2f'))