gsurma · black-square · Oct 21, 2022
diff --git a/cartpole.py b/cartpole.py
@@ -34,7 +34,7 @@ def __init__(self, observation_space, action_space):
         self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
         self.model.add(Dense(24, activation="relu"))
         self.model.add(Dense(self.action_space, activation="linear"))
-        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
+        self.model.compile(loss="mse", optimizer=Adam(learning_rate=LEARNING_RATE))
 
     def remember(self, state, action, reward, next_state, done):
         self.memory.append((state, action, reward, next_state, done))
@@ -61,28 +61,28 @@ def experience_replay(self):
 
 
 def cartpole():
-    env = gym.make(ENV_NAME)
+    env = gym.make(ENV_NAME) #render_mode='human'
     score_logger = ScoreLogger(ENV_NAME)
     observation_space = env.observation_space.shape[0]
     action_space = env.action_space.n
     dqn_solver = DQNSolver(observation_space, action_space)
     run = 0
     while True:
         run += 1
-        state = env.reset()
+        state, info = env.reset()
         state = np.reshape(state, [1, observation_space])
         step = 0
         while True:
             step += 1
             #env.render()
             action = dqn_solver.act(state)
-            state_next, reward, terminal, info = env.step(action)
+            state_next, reward, terminal, truncated, info = env.step(action)
             reward = reward if not terminal else -reward
             state_next = np.reshape(state_next, [1, observation_space])
             dqn_solver.remember(state, action, reward, state_next, terminal)
             state = state_next
             if terminal:
-                print "Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step)
+                print( "Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                 score_logger.add_score(step, run)
                 break
             dqn_solver.experience_replay()

diff --git a/scores/score_logger.py b/scores/score_logger.py
@@ -38,10 +38,10 @@ def add_score(self, score, run):
                        show_legend=True)
         self.scores.append(score)
         mean_score = mean(self.scores)
-        print "Scores: (min: " + str(min(self.scores)) + ", avg: " + str(mean_score) + ", max: " + str(max(self.scores)) + ")\n"
+        print( "Scores: (min: " + str(min(self.scores)) + ", avg: " + str(mean_score) + ", max: " + str(max(self.scores)) + ")\n" )
         if mean_score >= AVERAGE_SCORE_TO_SOLVE and len(self.scores) >= CONSECUTIVE_RUNS_TO_SOLVE:
             solve_score = run-CONSECUTIVE_RUNS_TO_SOLVE
-            print "Solved in " + str(solve_score) + " runs, " + str(run) + " total runs."
+            print( "Solved in " + str(solve_score) + " runs, " + str(run) + " total runs." )
             self._save_csv(SOLVED_CSV_PATH, solve_score)
             self._save_png(input_path=SOLVED_CSV_PATH,
                            output_path=SOLVED_PNG_PATH,
@@ -58,10 +58,12 @@ def _save_png(self, input_path, output_path, x_label, y_label, average_of_n_last
         y = []
         with open(input_path, "r") as scores:
             reader = csv.reader(scores)
-            data = list(reader)
-            for i in range(0, len(data)):
-                x.append(int(i))
-                y.append(int(data[i][0]))
+            i = 0
+            for row in reader:
+                if row:
+                    x.append(int(i))
+                    y.append(int(row[0]))
+                    i += 1      
 
         plt.subplots()
         plt.plot(x, y, label="score per run")