diff --git a/demo.py b/demo.py index 1ac7ef9..8f9cda9 100644 --- a/demo.py +++ b/demo.py @@ -77,7 +77,7 @@ def load(grid_size): sns.heatmap(r, cmap='viridis') plt.show() r_vector = r.reshape(n_states) # convert 2D reward matrix to a 1D vector -value_vector = model.find_optimal_value(r_vector, 0.005) +value_vector = model.find_optimal_value(r_vector, 0.1) policy = model.find_stochastic_policy(value_vector, r_vector) past_traj_len = past_traj.shape[0] svf_vector = model.find_svf_demo(policy, past_traj_len) diff --git a/loader/__init__.pyc b/loader/__init__.pyc deleted file mode 100644 index 4552746..0000000 Binary files a/loader/__init__.pyc and /dev/null differ diff --git a/loader/__pycache__/__init__.cpython-36.pyc b/loader/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 84d2cf7..0000000 Binary files a/loader/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/loader/__pycache__/offroad_loader.cpython-36.pyc b/loader/__pycache__/offroad_loader.cpython-36.pyc deleted file mode 100644 index c7c0601..0000000 Binary files a/loader/__pycache__/offroad_loader.cpython-36.pyc and /dev/null differ diff --git a/loader/__pycache__/util.cpython-36.pyc b/loader/__pycache__/util.cpython-36.pyc deleted file mode 100644 index 888b02f..0000000 Binary files a/loader/__pycache__/util.cpython-36.pyc and /dev/null differ diff --git a/loader/offroad_loader.pyc b/loader/offroad_loader.pyc deleted file mode 100644 index 3916929..0000000 Binary files a/loader/offroad_loader.pyc and /dev/null differ diff --git a/mdp/__init__.pyc b/mdp/__init__.pyc deleted file mode 100644 index bc191cc..0000000 Binary files a/mdp/__init__.pyc and /dev/null differ diff --git a/mdp/__pycache__/__init__.cpython-36.pyc b/mdp/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 865d59c..0000000 Binary files a/mdp/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/mdp/__pycache__/offroad_grid.cpython-36.pyc b/mdp/__pycache__/offroad_grid.cpython-36.pyc deleted file mode 100644 index bd0c191..0000000 Binary files a/mdp/__pycache__/offroad_grid.cpython-36.pyc and /dev/null differ diff --git a/mdp/offroad_grid.py b/mdp/offroad_grid.py index ab5ec0e..e8db667 100644 --- a/mdp/offroad_grid.py +++ b/mdp/offroad_grid.py @@ -229,6 +229,7 @@ def find_optimal_value(self, reward, thresh=0.005): :param reward: numpy array (n_states) :return: """ + start = time.clock() value = np.zeros(self.n_states) step = 0 import warnings @@ -239,8 +240,7 @@ def find_optimal_value(self, reward, thresh=0.005): for s in range(self.n_states): next_s_list = [self.transit_table[s, a] for a in range(self.n_actions)] - r_list = [reward[s] + self.discount * value[ss] for ss in next_s_list] - new_v = max(r_list) + new_v = reward[s] + max([self.discount * value[ss] for ss in next_s_list]) # find the largest update through out the whole sweep over all states max_update = max(max_update, abs(value[s] - new_v)) @@ -250,7 +250,7 @@ def find_optimal_value(self, reward, thresh=0.005): warnings.warn('value iteration does not converge', RuntimeWarning) break - print('find_optimal_value. iteration {}, last update {}'.format(step, max_update)) + print('find_optimal_value. iter {}, last update {:.2f}, took {:.2f}'.format(step, max_update, time.clock()-start)) return value def select_action(self, s, value, epsilon): diff --git a/mdp/offroad_grid.pyc b/mdp/offroad_grid.pyc deleted file mode 100644 index 164bf77..0000000 Binary files a/mdp/offroad_grid.pyc and /dev/null differ diff --git a/network/__init__.pyc b/network/__init__.pyc deleted file mode 100644 index 2adc604..0000000 Binary files a/network/__init__.pyc and /dev/null differ diff --git a/network/__pycache__/__init__.cpython-36.pyc b/network/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 22fbd4c..0000000 Binary files a/network/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/network/__pycache__/hybrid_dilated.cpython-36.pyc b/network/__pycache__/hybrid_dilated.cpython-36.pyc deleted file mode 100644 index f6d2831..0000000 Binary files a/network/__pycache__/hybrid_dilated.cpython-36.pyc and /dev/null differ diff --git a/network/__pycache__/hybrid_fcn.cpython-36.pyc b/network/__pycache__/hybrid_fcn.cpython-36.pyc deleted file mode 100644 index 43a0730..0000000 Binary files a/network/__pycache__/hybrid_fcn.cpython-36.pyc and /dev/null differ diff --git a/network/__pycache__/simple_fcn.cpython-36.pyc b/network/__pycache__/simple_fcn.cpython-36.pyc deleted file mode 100644 index 13b76c2..0000000 Binary files a/network/__pycache__/simple_fcn.cpython-36.pyc and /dev/null differ diff --git a/network/__pycache__/simple_nn.cpython-36.pyc b/network/__pycache__/simple_nn.cpython-36.pyc deleted file mode 100644 index e3f3b66..0000000 Binary files a/network/__pycache__/simple_nn.cpython-36.pyc and /dev/null differ diff --git a/network/hybrid_fcn.pyc b/network/hybrid_fcn.pyc deleted file mode 100644 index d8c2173..0000000 Binary files a/network/hybrid_fcn.pyc and /dev/null differ