add jupyter notebook file for plots; readme updates; etc.

Rui Li · Rui Li · commit d3185c014ccd · 2019-07-19T15:05:57.000+01:00
diff --git a/README.md b/README.md
@@ -3,17 +3,40 @@ Simulation scripts for the mobility management of UAV base stations project main
 
 # Requirements  
 * python2.7  
-* numpy
+* numpy==1.16.2
 * tensorflow  
 * IPython  
 * matplotlib   
 
 # Files
-* main.py   
-* channel.py  
+* main.py
+  ** main simulation script with A3C (V. Mnih et al. 2016. Asynchronous methods for deep reinforcement learning. In ICML. 1928–1937.) implementation  
+  ** multi-threading to initialise parallel training of multiple workers in parallel spaces (MobiEnvironments)
+  ** each worker creates a MobiEnvironment instance and starts training in this environment  
+  ** there is a pair of global AC nets and local AC nets for worker. Workers train their own nets individually while push the gradients to the global nets periodically, then the global nets optimise uploaded gradients from all workers and distribute the same optimal gradients to all workers. 
+  ** choices of CNN and MLP are implimented. Default MLP nets perform as well as CNN in prior work with less training complexity
+  
 * mobile_env.py  
+  ** followed openAI's gym implementation structure for a wireless mobile environment.
+  ** creates a LTE wireless channel which provides computation of SINR values and handover functionality 
+  ** step() and step_test() take action from the RL agent and returns updated state, reward, and customisable information to the RL agent. Please be careful here to make the two function consistant. It is not ideal to have two functions one for training and one for testing, but the reason to do this is to enable different user mobility models while keep both training and testing steps computationally cheap (rather than switching between if conditions per step).  
+  ** during training the user moves following the group reference model
+  ** during testing the users move using preloaded trace (ue_trace_10k.npy), which is generated from the group reference model
+  ** reward function currently consists of a reward on mean sinr value and a penalty on number of outaged users. which is open for improvement
+ 
+* channel.py 
+  ** downlink and uplink SINR  
+  ** In the WAIN work we take only downlink sinr
+
+* ue_mobility.py 
+  ** a couple of mobility models for UE's movement 
+  ** group reference (X. Hong et al. 1999. A group mobility model for ad hoc wireless networks. In ACM MSWiM. 53–60.) model is used in the WAIN paper. please check the WAIN paper for more details
 
 * main_test.py  
+  ** load trained model to test (taking input AC model from ./train/Global_A_PARA%.npy where % can be the training step, 2000 by default)
+  ** test is done on controlled ue mobility trace by loading a file ./ue_trace_10k.npy
+  ** at each test step, the output of nn is argmax-ed to make control decisions of UAV movements 
+  ** per step reward, SINR, and computation time are recorded for performance evaluation (output to ./test)
 
 # Build virtual environment  
 ` virtualenv env  `  
@@ -23,4 +46,6 @@ Simulation scripts for the mobility management of UAV base stations project main
 ` mkdir train `   
 ` python main.py  `  
 
-
+# Run testing
+` mkdir test `    
+` python main_test.py `
diff --git a/coverage_plot.ipynb b/coverage_plot.ipynb
diff --git a/gradient.py b/gradient.py
@@ -6,7 +6,7 @@
 from itertools import product
 
 FILE_NAME_APPEND = ""
-OUTPUT_DIR = "test/"
+OUTPUT_DIR = "gradient/"
 OUTPUT_FILE_NAME = OUTPUT_DIR + "reward" + FILE_NAME_APPEND
 N_BS = 4
 
@@ -39,7 +39,7 @@ def Choose_Act_Gradient(actual_env, s, n_step):
 def Run_Test(reward_file_name):
     MAX_STEP = 10000
     #if reading mobility trace from file
-    test_env = MobiEnvironment(N_BS, 40, 100, "read_trace", "../ue_trace_10k.npy")
+    test_env = MobiEnvironment(N_BS, 40, 100, "read_trace", "./ue_trace_10k.npy")
     
     s = np.array([np.ravel(test_env.reset())])
     
diff --git a/main_test.py b/main_test.py
@@ -2,9 +2,12 @@
 TEST_ALGO = "A3C"
 
 FILE_NAME_APPEND = "2000"
-OUTPUT_FILE_NAME = "test/" + FILE_NAME_APPEND
+OUTPUT_FILE_NAME = "test/" + FILE_NAME_APPEND + '_'
 
 def Load_AC_Net():
+    """
+    Load pre-trained A3C model for testing
+    """
     file_name = "train/Global_A_PARA" + FILE_NAME_APPEND +".npz"
     files = np.load(file_name)
 
@@ -18,6 +21,9 @@ def Load_AC_Net():
     return G_AC_TEST
 
 def Load_DPPO_Net():
+    """
+    Load pre-trained DDPO model for testing
+    """
 
     file_name = "test/PI_PARA" + FILE_NAME_APPEND +".npz"
     files = np.load(file_name)
@@ -32,13 +38,13 @@ def Load_DPPO_Net():
     return G_PPO_TEST
 
 def Run_Test(g_test_net, reward_file_name):
+    #maximum training step
     MAX_STEP = 10000
-    #if reading mobility trace from file
+
+    #Reading mobility trace from file
     test_env = MobiEnvironment(N_BS, 40, 100, "read_trace", "./ue_trace_10k.npy")
-    #if producing mobility trace
-#    test_env = MobiEnvironment(N_BS, 40, 100, "group")
-    # test_env.plot_sinr_map()
 
+    #reset states
     s = np.array([np.ravel(test_env.reset())])
 
     done = False
@@ -70,10 +76,10 @@ def Run_Test(g_test_net, reward_file_name):
 	    np.save(reward_file_name + "time", time_all)
 #            np.save("ue_trace_10k", ue_walk_trace)
 
-        if step % 5 == 0:
-            np.save(reward_file_name +"ue_loc" + str(step), test_env.ueLoc)
-            np.save(reward_file_name +"sinr_map" + str(step), test_env.sinr_map)
-            np.save(reward_file_name +"assoc_sinr" + str(step), test_env.assoc_sinr)
+        #if step % 5 == 0:
+            #np.save(reward_file_name +"ue_loc" + str(step), test_env.ueLoc)
+            #np.save(reward_file_name +"sinr_map" + str(step), test_env.sinr_map)
+            #np.save(reward_file_name +"assoc_sinr" + str(step), test_env.assoc_sinr)
         # reset the environment every 2000 steps
         if step % 2000 == 0:
             s = np.array([np.ravel(test_env.reset())])
@@ -88,7 +94,6 @@ def Run_Test(g_test_net, reward_file_name):
     np.save(reward_file_name + "reward", reward_buf)
     np.save(reward_file_name + "sinr",sinr_all)
     np.save(reward_file_name + "time", time_all)
-#    print np.shape(ue_walk_trace)
 #    np.save("ue_trace_10k", ue_walk_trace)
 
 if __name__ == "__main__":
diff --git a/ue_mobility.py b/ue_mobility.py
@@ -1,4 +1,4 @@
-# Python code for 2D random walk.
+# Python code for 2D random walk, fixed direction, and group reference point mobility model.
 import numpy as np
 import pylab
 import random

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Python code for 2D random walk.`
	`1`	`+# Python code for 2D random walk, fixed direction, and group reference point mobility model.`
`2`	`2`	`import numpy as np`
`3`	`3`	`import pylab`
`4`	`4`	`import random`