Skip to content

Commit d9033a0

Browse files
committed
Created file for exploit FRMDP, both with and without usage information.
Basic file organization.
1 parent 85f3e56 commit d9033a0

29 files changed

+1271
-28
lines changed

project/exploit.py

+697
Large diffs are not rendered by default.
Binary file not shown.

project/frmdp.py

+62-26
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ def __init__(self, states=None, actions=None, termination_rewards=None, transiti
1414
horizon=None):
1515
"""
1616
:param list states: the valid states in the model.
17+
1718
:param list actions: the list of actions in the model.
1819
:param list termination_rewards: reward for ending in each state.
1920
:param ndarray transitions: maps state, action, next state to their probability. transitions[state][action][next_state] = P(next_state | state, action)
2021
:param int horizon: number of rounds in the decision process.
21-
"""
22+
"""
2223
# equivalent to S in paper.
2324
self.s = states
2425
# equivalent to A in paper.
@@ -43,12 +44,13 @@ def __init__(self, states=None, actions=None, termination_rewards=None, transiti
4344
#
4445
#
4546
# This implies each action has a full list of states for next-states and many may
46-
# simply have 0 probability. Thus a dot product w/ self.values will perform the
47+
# simply have 0 probability. Thus a dot product w/ self.values will perform the
4748
# full P(s') * val(s') operation.
4849
self.t = transitions
4950
# h in paper.
5051
self.horizon = horizon
5152

53+
5254
def is_terminal(self, state):
5355
"""
5456
Checks if MDP is in terminal state.
@@ -57,6 +59,7 @@ def is_terminal(self, state):
5759
"""
5860
raise NotImplementedError()
5961

62+
6063
def get_probs(self, state, action):
6164
"""
6265
Returns the list of transition probabilities.
@@ -65,7 +68,9 @@ def get_probs(self, state, action):
6568
:return the vector of probabilities for each next state given an action and state, or a matrix of probabilities for each action.
6669
:rtype ndarray.
6770
"""
68-
return self.t[state][action][:]
71+
probs = self.t[state][action][:]
72+
return probs
73+
6974

7075
def get_reward(self, state, action, state_reward):
7176
"""
@@ -77,13 +82,15 @@ def get_reward(self, state, action, state_reward):
7782
"""
7883
raise NotImplementedError()
7984

85+
8086
def termination_reward(self, state):
8187
"""
8288
Checks if MDP is in terminal state.
8389
:param int state: The index of state to check if is terminal.
8490
:return termination reward for this state.
8591
"""
8692
return self.g[state]
93+
8794

8895
def take_action(self, state, action):
8996
"""
@@ -118,12 +125,13 @@ def value_iteration(self):
118125
continue
119126
# get value for each action in state.
120127
for a, action in enumerate(self.a):
121-
options[s][a] = np.dot(self.get_probs(s, a),
122-
self.get_reward(s, a, delta[t - 1]))
128+
val = np.dot(self.get_probs(s, a),
129+
self.get_reward(s, a, delta[t - 1]))
130+
options[s][a] = val
123131
# set optimal policy and value.
124132
policy[s] = np.argmax(options[s])
125133
delta[t][s] = options[s][policy[s]]
126-
134+
127135
return delta[self.horizon], policy
128136

129137

@@ -149,7 +157,7 @@ def policy_iteration(self, policy):
149157
a = policy[s]
150158
delta[t][s] = np.dot(self.get_probs(s, a),
151159
self.get_reward(s, a, delta[t - 1]))
152-
160+
153161
return delta[self.horizon]
154162

155163

@@ -192,13 +200,13 @@ def simulate(self, start_state, policy):
192200
break
193201

194202
reward += self.termination_reward(state)
195-
return reward, history
203+
return reward, history
196204

197205

198206

199207
class MDP(FRMDP):
200208
"""
201-
FRMDP implementation of a basic MDP. This has been validated by comparing to a known
209+
FRMDP implementation of a basic MDP. This has been validated by comparing to a known
202210
correct MDP implementation.
203211
"""
204212

@@ -231,9 +239,9 @@ def get_reward(self, state, action, state_reward):
231239

232240
class BasicCoffeeRobot(MDP):
233241
"""
234-
Proof of concept to show this produces the same output as the standard MDP.
242+
MDP for value iteration including spills cost.
235243
"""
236-
244+
237245
def __init__(self):
238246
states = [(a,b) for a in range(1,4) for b in range(1,4)] # (1,1) ... (3,3)
239247
actions = ['up', 'right', 'down', 'left']
@@ -243,7 +251,20 @@ def __init__(self):
243251
horizon = 10
244252

245253
super().__init__(states, actions, termination, transitions, horizon, rewards)
246-
254+
255+
256+
def get_reward(self, state, action, state_reward):
257+
"""
258+
Gets reward for transition from state->action->nextState.
259+
:param int state: The current state id.
260+
:param int action: The current action id.
261+
:param ndarray state_reward: The vector of rewards from the previous iteration of this state, action pair.
262+
:return vector of rewards for each next_state.
263+
"""
264+
# to_subtract = 0.1 * (np.full(len(self.s), -1 * state_reward))
265+
p_spill = 0.03
266+
return (1 - p_spill) * (self.rewards[state][action] + (self.discount * state_reward)) + p_spill * (np.full(len(self.s), -1))
267+
247268
def is_terminal(self, state):
248269
"""
249270
Checks if this state is a terminal state.
@@ -260,7 +281,7 @@ class CoffeeRobot(FRMDP):
260281
"""
261282
Coffee Robot example from the paper. A robot on a 3x3 grid needs to deliver coffee
262283
to the space (3,3). The robot can move in any of four directions, but may end up
263-
to the left of the intended space due to a fault in the code. The cost of spilling
284+
to the left of the intended space due to a fault in the code. The cost of spilling
264285
in a space is:
265286
(3,3,)
266287
+----+----+----+
@@ -302,7 +323,7 @@ def __init__(self):
302323
self.spill_cost = np.array([-1,-2,-1, -3,-3,-4, -1,-2,0])
303324
self.spill_prob = np.array([0.03,0.05,0.03, 0.02,0.04,0.02, 0.01,0.05,0.03])
304325
super().__init__(states, actions, termination, transitions, horizon)
305-
326+
306327
def get_reward(self, state, action, state_reward):
307328
"""
308329
Gets reward for transition from state->action->nextState.
@@ -313,7 +334,7 @@ def get_reward(self, state, action, state_reward):
313334
"""
314335
# P * r + (1-P) * x.
315336
return self.spill_prob * self.spill_cost + (1 - self.spill_prob) * state_reward
316-
337+
317338
def is_terminal(self, state):
318339
"""
319340
Checks if this state is a terminal state.
@@ -326,14 +347,29 @@ def is_terminal(self, state):
326347

327348

328349

329-
coffee = BasicCoffeeRobot()
330-
opt_val1, opt_policy1 = coffee.value_iteration()
331-
test_val1 = coffee.policy_iteration(opt_policy1)
332-
assert(np.array_equal(opt_val1, test_val1))
333-
coffee2 = CoffeeRobot()
334-
opt_val2, opt_policy2 = coffee2.value_iteration()
335-
assert(np.array_equal(opt_val1, opt_val2))
336-
assert(np.array_equal(opt_policy1, opt_policy2))
337-
test_val2 = coffee2.policy_iteration(opt_policy2)
338-
assert(np.array_equal(test_val1, test_val2))
339-
print("woohoo.")
350+
def main():
351+
# Create basic coffee robot, perform value iteration, and ensure the
352+
# values match the policy iteration of that optimal policy.
353+
coffee = BasicCoffeeRobot()
354+
opt_val1, opt_policy1 = coffee.value_iteration()
355+
test_val1 = coffee.policy_iteration(opt_policy1)
356+
assert(np.array_equal(opt_val1, test_val1))
357+
# Create coffee robot, perform value iteration, and ensure the
358+
# values match the policy iteration of that optimal policy.
359+
coffee2 = CoffeeRobot()
360+
opt_val2, opt_policy2 = coffee2.value_iteration()
361+
test_val2 = coffee2.policy_iteration(opt_policy2)
362+
assert(np.array_equal(opt_val2, test_val2))
363+
# Assert they produce different policies and value outputs, as one would expect.
364+
assert(not np.array_equal(opt_val1, opt_val2))
365+
assert(not np.array_equal(opt_policy1, opt_policy2))
366+
# Display derived policies.
367+
print("\nMDP Coffee Robot Policy.")
368+
coffee.describe_policy(opt_policy1)
369+
print("-"*50) # separator.
370+
print("\nFRMDP Coffee Robot Policy.")
371+
coffee.describe_policy(opt_policy2)
372+
373+
374+
if __name__ == "__main__":
375+
main()

project/papers/Not Used/abs-03.pdf

952 KB
Binary file not shown.

project/papers/Not Used/cavusoglu.pdf

213 KB
Binary file not shown.
289 KB
Binary file not shown.
187 KB
Binary file not shown.
1.64 MB
Binary file not shown.
1.23 MB
Binary file not shown.
File renamed without changes.
File renamed without changes.
Binary file not shown.

project/papers/citations.txt

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
1. D. P. Duggan and R. C. Parks, "Principles of Cyberwarfare," in IEEE Security & Privacy, vol. 9, no. , pp. 30-35, 2011. doi:10.1109/MSP.2011.138
2+
2. D. Elliott, "Deterring Strategic Cyberattack," in IEEE Security & Privacy, vol. 9, no. 5, pp. 36-40, Sept.-Oct. 2011.
3+
doi: 10.1109/MSP.2011.24
4+
3. Lye, K. & Wing, J. IJIS (2005) 4: 71. https://doi.org/10.1007/s10207-004-0060-x
5+
4. Moore, Tyler et al. “Would a 'cyber warrior' protect us: exploring trade-offs between attack and defense of information systems.” NSPW (2010).
6+
5. Nong Ye, Yebin Zhang and C. M. Borror, "Robustness of the Markov-chain model for cyber-attack detection," in IEEE Transactions on Reliability, vol. 53, no. 1, pp. 116-123, March 2004. doi: 10.1109/TR.2004.823851
7+
6. S. Roy, C. Ellis, S. Shiva, D. Dasgupta, V. Shandilya and Q. Wu, "A Survey of Game Theory as Applied to Network Security," 2010 43rd Hawaii International Conference on System Sciences, Honolulu, HI, 2010, pp. 1-10. doi: 10.1109/HICSS.2010.35
8+
7. Spanjaard O., Weng P. (2013) Markov Decision Processes with Functional Rewards. In: Ramanna S., Lingras P., Sombattheera C., Krishna A. (eds) Multi-disciplinary Trends in Artificial Intelligence. MIWAI 2013. Lecture Notes in Computer Science, vol 8271. Springer, Berlin, Heidelberg
9+
8. T. Bao, Y. Shoshitaishvili, R. Wang, C. Kruegel, G. Vigna and D. Brumley, "How Shall We Play a Game?: A Game-theoretical Model for Cyber-warfare Games," 2017 IEEE 30th Computer Security Foundations Symposium (CSF), Santa Barbara, CA, 2017, pp. 7-21. doi: 10.1109/CSF.2017.34

0 commit comments

Comments
 (0)