CrosleyZack
diff --git a/‎project/exploit.py
+697 b/‎project/exploit.py
+697
diff --git a/‎project/final_submission/cse574_final_submission.docx
233 KB b/‎project/final_submission/cse574_final_submission.docx
233 KB
diff --git a/‎project/frmdp.py
+62-26 b/‎project/frmdp.py
+62-26
diff --git a/‎project/papers/Not Used/abs-03.pdf
952 KB b/‎project/papers/Not Used/abs-03.pdf
952 KB
diff --git a/‎project/papers/Not Used/cavusoglu.pdf
213 KB b/‎project/papers/Not Used/cavusoglu.pdf
213 KB
diff --git a/‎project/papers/Used and Read/01282169.pdf
289 KB b/‎project/papers/Used and Read/01282169.pdf
289 KB
diff --git a/‎project/papers/Used and Read/05428673.pdf
187 KB b/‎project/papers/Used and Read/05428673.pdf
187 KB
diff --git a/‎project/papers/Used and Read/05719592.pdf
1.64 MB b/‎project/papers/Used and Read/05719592.pdf
1.64 MB
diff --git a/‎project/papers/Used and Read/06029360.pdf
1.23 MB b/‎project/papers/Used and Read/06029360.pdf
1.23 MB
diff --git a/‎project/papers/Used and Read/Lye-Wing2005_Article_GameStrategiesInNetworkSecurit.pdf
841 KB b/‎project/papers/Used and Read/Lye-Wing2005_Article_GameStrategiesInNetworkSecurit.pdf
841 KB
diff --git a/‎project/papers/bao2017csf.pdf ‎project/papers/Used and Read/bao2017csf.pdf b/‎project/papers/bao2017csf.pdf ‎project/papers/Used and Read/bao2017csf.pdf
diff --git a/‎project/papers/miwai2013-1.pdf ‎project/papers/Used and Read/miwai2013-1.pdf b/‎project/papers/miwai2013-1.pdf ‎project/papers/Used and Read/miwai2013-1.pdf
diff --git a/‎project/papers/Used and Read/nspw2010-moore.pdf
719 KB b/‎project/papers/Used and Read/nspw2010-moore.pdf
719 KB
diff --git a/‎project/papers/citations.txt
+9 b/‎project/papers/citations.txt
+9
@@ -14,11 +14,12 @@ def __init__(self, states=None, actions=None, termination_rewards=None, transiti
                        horizon=None):
         """
         :param list states: the valid states in the model.
+
         :param list actions: the list of actions in the model.
         :param list termination_rewards: reward for ending in each state.
         :param ndarray transitions: maps state, action, next state to their probability. transitions[state][action][next_state] = P(next_state | state, action)
         :param int horizon: number of rounds in the decision process.
-        """   
+        """
         # equivalent to S in paper.
         self.s = states
         # equivalent to A in paper.
@@ -43,12 +44,13 @@ def __init__(self, states=None, actions=None, termination_rewards=None, transiti
         #
         #
         #      This implies each action has a full list of states for next-states and many may
-        #      simply have 0 probability. Thus a dot product w/ self.values will perform the 
+        #      simply have 0 probability. Thus a dot product w/ self.values will perform the
         #      full P(s') * val(s') operation.
         self.t = transitions
         # h in paper.
         self.horizon = horizon
 
+
     def is_terminal(self, state):
         """
         Checks if MDP is in terminal state.
@@ -57,6 +59,7 @@ def is_terminal(self, state):
         """
         raise NotImplementedError()
 
+
     def get_probs(self, state, action):
         """
         Returns the list of transition probabilities.
@@ -65,7 +68,9 @@ def get_probs(self, state, action):
         :return the vector  of probabilities for each next state given an action and state, or a matrix of probabilities for each action.
         :rtype ndarray.
         """
-        return self.t[state][action][:]
+        probs = self.t[state][action][:]
+        return probs
+
 
     def get_reward(self, state, action, state_reward):
         """
@@ -77,13 +82,15 @@ def get_reward(self, state, action, state_reward):
         """
         raise NotImplementedError()
 
+
     def termination_reward(self, state):
         """
         Checks if MDP is in terminal state.
         :param int state: The index of state to check if is terminal.
         :return termination reward for this state.
         """
         return self.g[state]
+    
 
     def take_action(self, state, action):
         """
@@ -118,12 +125,13 @@ def value_iteration(self):
                     continue
                 # get value for each action in state.
                 for a, action in enumerate(self.a):
-                    options[s][a] = np.dot(self.get_probs(s, a),
-                                           self.get_reward(s, a, delta[t - 1]))
+                    val = np.dot(self.get_probs(s, a),
+                                 self.get_reward(s, a, delta[t - 1]))
+                    options[s][a] = val
                 # set optimal policy and value.
                 policy[s] = np.argmax(options[s])
                 delta[t][s] = options[s][policy[s]]
-        
+
         return delta[self.horizon], policy
 
 
@@ -149,7 +157,7 @@ def policy_iteration(self, policy):
                 a = policy[s]
                 delta[t][s] = np.dot(self.get_probs(s, a),
                                      self.get_reward(s, a, delta[t - 1]))
-        
+
         return delta[self.horizon]
 
 
@@ -192,13 +200,13 @@ def simulate(self, start_state, policy):
                 break
 
         reward += self.termination_reward(state)
-        return reward, history  
+        return reward, history
 
 
 
 class MDP(FRMDP):
     """
-    FRMDP implementation of a basic MDP. This has been validated by comparing to a known 
+    FRMDP implementation of a basic MDP. This has been validated by comparing to a known
     correct MDP implementation.
     """
 
@@ -231,9 +239,9 @@ def get_reward(self, state, action, state_reward):
 
 class BasicCoffeeRobot(MDP):
     """
-    Proof of concept to show this produces the same output as the standard MDP.
+    MDP for value iteration including spills cost.
     """
-    
+
     def __init__(self):
         states = [(a,b) for a in range(1,4) for b in range(1,4)] # (1,1) ... (3,3)
         actions = ['up', 'right', 'down', 'left']
@@ -243,7 +251,20 @@ def __init__(self):
         horizon = 10
 
         super().__init__(states, actions, termination, transitions, horizon, rewards)
-    
+
+
+    def get_reward(self, state, action, state_reward):
+        """
+        Gets reward for transition from state->action->nextState.
+        :param int state: The current state id.
+        :param int action: The current action id.
+        :param ndarray state_reward: The vector of rewards from the previous iteration of this state, action pair.
+        :return vector of rewards for each next_state.
+        """
+        # to_subtract =  0.1 * (np.full(len(self.s), -1 * state_reward))
+        p_spill = 0.03
+        return (1 - p_spill) * (self.rewards[state][action] + (self.discount * state_reward)) + p_spill * (np.full(len(self.s), -1))
+
     def is_terminal(self, state):
         """
         Checks if this state is a terminal state.
@@ -260,7 +281,7 @@ class CoffeeRobot(FRMDP):
     """
     Coffee Robot example from the paper. A robot on a 3x3 grid needs to deliver coffee
     to the space (3,3). The robot can move in any of four directions, but may end up
-    to the left of the intended space due to a fault in the code. The cost of spilling 
+    to the left of the intended space due to a fault in the code. The cost of spilling
     in a space is:
                         (3,3,)
         +----+----+----+
@@ -302,7 +323,7 @@ def __init__(self):
         self.spill_cost = np.array([-1,-2,-1, -3,-3,-4, -1,-2,0])
         self.spill_prob = np.array([0.03,0.05,0.03, 0.02,0.04,0.02, 0.01,0.05,0.03])
         super().__init__(states, actions, termination, transitions, horizon)
-    
+
     def get_reward(self, state, action, state_reward):
         """
         Gets reward for transition from state->action->nextState.
@@ -313,7 +334,7 @@ def get_reward(self, state, action, state_reward):
         """
         # P * r + (1-P) * x.
         return self.spill_prob * self.spill_cost + (1 - self.spill_prob) * state_reward
-    
+
     def is_terminal(self, state):
         """
         Checks if this state is a terminal state.
@@ -326,14 +347,29 @@ def is_terminal(self, state):
 
 
 
-coffee = BasicCoffeeRobot()
-opt_val1, opt_policy1 = coffee.value_iteration()
-test_val1 = coffee.policy_iteration(opt_policy1)
-assert(np.array_equal(opt_val1, test_val1))
-coffee2 = CoffeeRobot()
-opt_val2, opt_policy2 = coffee2.value_iteration()
-assert(np.array_equal(opt_val1, opt_val2))
-assert(np.array_equal(opt_policy1, opt_policy2))
-test_val2 = coffee2.policy_iteration(opt_policy2)
-assert(np.array_equal(test_val1, test_val2))
-print("woohoo.")
+def main():
+    # Create basic coffee robot, perform value iteration, and ensure the 
+    #   values match the policy iteration of that optimal policy.
+    coffee = BasicCoffeeRobot()
+    opt_val1, opt_policy1 = coffee.value_iteration()
+    test_val1 = coffee.policy_iteration(opt_policy1)
+    assert(np.array_equal(opt_val1, test_val1))
+    # Create coffee robot, perform value iteration, and ensure the 
+    #   values match the policy iteration of that optimal policy.
+    coffee2 = CoffeeRobot()
+    opt_val2, opt_policy2 = coffee2.value_iteration()
+    test_val2 = coffee2.policy_iteration(opt_policy2)
+    assert(np.array_equal(opt_val2, test_val2))
+    # Assert they produce different policies and value outputs, as one would expect.
+    assert(not np.array_equal(opt_val1, opt_val2))
+    assert(not np.array_equal(opt_policy1, opt_policy2))
+    # Display derived policies.
+    print("\nMDP Coffee Robot Policy.")
+    coffee.describe_policy(opt_policy1)
+    print("-"*50) # separator.
+    print("\nFRMDP Coffee Robot Policy.")
+    coffee.describe_policy(opt_policy2)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,9 @@
+1. D. P. Duggan and R. C. Parks, "Principles of Cyberwarfare," in IEEE Security & Privacy, vol. 9, no. , pp. 30-35, 2011. doi:10.1109/MSP.2011.138
+2. D. Elliott, "Deterring Strategic Cyberattack," in IEEE Security & Privacy, vol. 9, no. 5, pp. 36-40, Sept.-Oct. 2011.
+doi: 10.1109/MSP.2011.24
+3. Lye, K. & Wing, J. IJIS (2005) 4: 71. https://doi.org/10.1007/s10207-004-0060-x
+4. Moore, Tyler et al. “Would a 'cyber warrior' protect us: exploring trade-offs between attack and defense of information systems.” NSPW (2010).
+5. Nong Ye, Yebin Zhang and C. M. Borror, "Robustness of the Markov-chain model for cyber-attack detection," in IEEE Transactions on Reliability, vol. 53, no. 1, pp. 116-123, March 2004. doi: 10.1109/TR.2004.823851
+6. S. Roy, C. Ellis, S. Shiva, D. Dasgupta, V. Shandilya and Q. Wu, "A Survey of Game Theory as Applied to Network Security," 2010 43rd Hawaii International Conference on System Sciences, Honolulu, HI, 2010, pp. 1-10. doi: 10.1109/HICSS.2010.35
+7. Spanjaard O., Weng P. (2013) Markov Decision Processes with Functional Rewards. In: Ramanna S., Lingras P., Sombattheera C., Krishna A. (eds) Multi-disciplinary Trends in Artificial Intelligence. MIWAI 2013. Lecture Notes in Computer Science, vol 8271. Springer, Berlin, Heidelberg
+8. T. Bao, Y. Shoshitaishvili, R. Wang, C. Kruegel, G. Vigna and D. Brumley, "How Shall We Play a Game?: A Game-theoretical Model for Cyber-warfare Games," 2017 IEEE 30th Computer Security Foundations Symposium (CSF), Santa Barbara, CA, 2017, pp. 7-21. doi: 10.1109/CSF.2017.34