diff --git a/canonical_assembly_v2.py b/canonical_assembly_v2.py
index 5779b46..6691f9e 100644
--- a/canonical_assembly_v2.py
+++ b/canonical_assembly_v2.py
@@ -315,7 +315,7 @@ def callback(self, data):
         self.time_step = len(self.user_sequence)
 
         # update remaining parts
-        self.remaining_objects = [rem_obj for rem_obj in self.remaining_objects if rem_obj not in detected_parts]        
+        self.remaining_objects = [rem_obj for rem_obj in list(self.objects.keys()) if rem_obj not in detected_parts]        
 
 
     def deliver_part(self):
@@ -492,7 +492,18 @@ def deliver_part(self):
                 else:
                 # ------------------- Move Container back to original place if not boxes------------------- #
                     # wait for user to grab item
-                    time.sleep(3)
+                    if chosen_obj == "tool":
+                        # hold tool box until user picks up tool and put tool back
+                        while("tool" in self.remaining_objects):
+                            time.sleep(0.1)
+                        print("picked up tool")
+                        time.sleep(0.5)
+                        while ("tool" not in self.remaining_objects):
+                            time.sleep(0.5)
+                            print("tool is out")
+                        print("tool put back")
+                    else:
+                        time.sleep(3)
 
                     if chosen_obj in ["long bolts", "short bolts", "long wire"]:
                         #print("smoothier turn")
diff --git a/common.py b/common.py
index 120a08b..5f12e10 100644
--- a/common.py
+++ b/common.py
@@ -73,4 +73,53 @@ def transition(s_from, a):
         s_to[-2] = a
         return p, s_to
     else:
-        return p, None
\ No newline at end of file
+        return p, None
+
+
+def back_transition(s_to, a):
+    # preconditions
+    if s_to[a] > 0:
+        if a == 0 and s_to[2] < 1:
+            p = 1.0
+        elif a == 1 and s_to[3] < 1:
+            p = 1.0
+        elif a in [2, 3] and s_to[a] > s_to[a + 2]:
+            p = 1.0
+        elif a in [6] and s_to[a + 1] < 1:
+            p = 1.0
+        elif a in [4, 5, 7]:
+            p = 1.0
+        else:
+            p = 0.0
+    else:
+        p = 0.0
+
+    # transition to next state
+    if p == 1.0:
+        s_from = deepcopy(s_to)
+        s_from[a] -= 1
+        return p, s_from
+    else:
+        return p, None
+
+def canonical_transition(s_from, a):
+        # preconditions
+        if s_from[a] < 1:
+            if a in [0, 1, 2, 5]:
+                prob = 1.0
+            elif a in [3, 4] and s_from[a - 3] == 1:
+                prob = 1.0
+            else:
+                prob = 0.0
+        else:
+            prob = 0.0
+
+        # transition to next state
+        if prob == 1.0:
+            s_to = deepcopy(s_from)
+            s_to[a] += 1
+            s_to[-1] = s_from[-2]
+            s_to[-2] = a
+            return prob, s_to
+        else:
+            return prob, None
\ No newline at end of file
diff --git a/compute_weights.py b/compute_weights.py
new file mode 100644
index 0000000..5f19d20
--- /dev/null
+++ b/compute_weights.py
@@ -0,0 +1,154 @@
+# import python libraries
+import os
+import pdb
+import numpy as np
+from copy import deepcopy
+import pandas as pd
+import pickle
+
+# import functions
+import src.optimizer as O  # stochastic gradient descent optimizer
+from src.vi import value_iteration
+from src.maxent_irl import *
+from src.assembly_tasks import *
+from src.import_qualtrics import get_qualtrics_survey
+
+# ----------------------------------------------- Load data ---------------------------------------------------- #
+
+# download data from qualtrics
+learning_survey_id = "SV_8eoX63z06ZhVZRA"
+data_path = "/home/icaros/ros_ws/src/ada_manipulation_demos/data/"  # os.path.dirname(__file__) + "/data/"
+get_qualtrics_survey(dir_save_survey=data_path, survey_id=learning_survey_id)
+
+# load user data
+demo_path = data_path + "Human-Robot Assembly - Learning.csv"
+df = pd.read_csv(demo_path)
+
+
+# pre-process feature value
+def process_val(x):
+    if x == "1 (No effort at all)":
+        x = 1.1
+    elif x == "7 (A lot of effort)":
+        x = 6.9
+    else:
+        x = float(x)
+
+    return x
+
+
+# load user ratings
+def load_features(data, user_idx, feature_idx, action_idx):
+    fea_mat = []
+    for j in action_idx:
+        fea_vec = []
+        for k in feature_idx:
+            fea_col = k + str(j)
+            fea_val = process_val(data[fea_col][user_idx])
+            fea_vec.append(fea_val)
+        fea_mat.append(fea_vec)
+    return fea_mat
+
+
+# ----------------------------------------------- Optimization -------------------------------------------------- #
+
+# choose our parameter initialization strategy:
+# initialize parameters with constant
+init = O.Constant(0.5)
+
+# choose our optimization strategy:
+# we select exponentiated stochastic gradient descent with linear learning-rate decay
+optim = O.ExpSga(lr=O.linear_decay(lr0=0.5))
+
+# --------------------------------------------- User information ------------------------------------------------ #
+
+rank_features = False
+scale_weights = False
+
+user_id = input("Enter user id: ")
+
+print("=======================")
+print("Calculating preference for user:", user_id)
+
+idx = df.index[df['Q1'] == user_id][0]
+canonical_survey_actions = [0, 3, 1, 4, 2, 5]
+preferred_order = [df[q][idx] for q in ['Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6']]
+canonical_demo = [a for _, a in sorted(zip(preferred_order, canonical_survey_actions))]
+
+# user ratings for features
+canonical_q, complex_q = ["Q6_", "Q7_"], ["Q13_", "Q14_"] 
+canonical_features = load_features(df, idx, canonical_q, [2, 4, 6, 3, 5, 7])
+complex_features = load_features(df, idx, complex_q, [3, 8, 15, 16, 4, 9, 10, 11])
+
+# ---------------------------------------- Training: Learn weights ---------------------------------------------- #
+
+# initialize canonical task
+C = CanonicalTask(canonical_features)
+C.set_end_state(canonical_demo)
+C.enumerate_states()
+C.set_terminal_idx()
+if rank_features:
+    C.convert_to_rankings()
+
+# demonstrations
+canonical_user_demo = [canonical_demo]
+canonical_trajectories = get_trajectories(C.states, canonical_user_demo, C.transition)
+
+print("Training ...")
+
+# using abstract features
+abstract_features = np.array([C.get_features(state) for state in C.states])
+norm_abstract_features = abstract_features / np.linalg.norm(abstract_features, axis=0)
+canonical_rewards_abstract, canonical_weights_abstract = maxent_irl(C, norm_abstract_features,
+                                                                    canonical_trajectories,
+                                                                    optim, init)
+
+print("Weights have been learned for the canonical task! Fingers X-ed.")
+print("Weights -", canonical_weights_abstract)
+
+# scale weights
+if scale_weights:
+    canonical_weights_abstract /= max(canonical_weights_abstract)
+
+# ----------------------------------------- Testing: Predict complex -------------------------------------------- #
+sample_complex_demo = [1, 3, 5, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 7]
+
+complex_survey_actions = [0, 4, 1, 5, 6, 7, 2, 3]
+action_counts = [1, 1, 4, 1, 4, 1, 4, 1]
+preferred_order = [df[q][idx] for q in ['Q15_1', 'Q15_2', 'Q15_3', 'Q15_4', 'Q15_5', 'Q15_6', 'Q15_7', 'Q15_8']]
+complex_demo = []
+for _, a in sorted(zip(preferred_order, complex_survey_actions)):
+    complex_demo += [a]*action_counts[a]
+
+# initialize complex task
+X = ComplexTask(complex_features)
+X.set_end_state(sample_complex_demo)
+X.enumerate_states()
+X.set_terminal_idx()
+if rank_features:
+    X.convert_to_rankings()
+
+# using abstract features
+complex_abstract_features = np.array([X.get_features(state) for state in X.states])
+complex_abstract_features /= np.linalg.norm(complex_abstract_features, axis=0)
+
+# transfer rewards to complex task
+transfer_rewards_abstract = complex_abstract_features.dot(canonical_weights_abstract)
+
+# compute q-values for each state based on learned weights
+qf_transfer, _, _ = value_iteration(X.states, X.actions, X.transition, transfer_rewards_abstract, X.terminal_idx)
+
+# score for predicting the action based on transferred rewards based on abstract features
+# predict_sequence, predict_score = predict_trajectory(qf_transfer, X.states, [complex_demo], X.transition,
+#                                                              sensitivity=0.0, consider_options=False)
+
+print("canonical : ", canonical_demo)
+print("preference: ", complex_demo)
+
+# save_path = data_path + "learned_models/"
+pickle.dump(canonical_weights_abstract, open(data_path + "weights_" + user_id + ".p", "wb"))
+pickle.dump(X, open(data_path + "task_" + user_id + ".p", "wb"))
+pickle.dump(complex_abstract_features, open(data_path + "features_" + user_id + ".p", "wb"))
+pickle.dump(qf_transfer, open(data_path + "q_values_" + user_id + ".p", "wb"))
+pickle.dump(X.states, open(data_path + "states_" + user_id + ".p", "wb"))
+print("Q-values have been saved for user " + user_id + ".")
diff --git a/data/Human-Robot Assembly - Learning.csv b/data/Human-Robot Assembly - Learning.csv
new file mode 100644
index 0000000..7287b7c
--- /dev/null
+++ b/data/Human-Robot Assembly - Learning.csv	
@@ -0,0 +1,21 @@
+ResponseID,ResponseSet,IPAddress,StartDate,EndDate,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalDataReference,Finished,Status,player_id,survey_id,Q1,Q2,Q2_TEXT,Q3,Q3_TEXT,Q4,Q5,Q6_2,Q6_3,Q6_4,Q6_5,Q6_6,Q6_7,Q7_2,Q7_3,Q7_4,Q7_5,Q7_6,Q7_7,*,Q9_1,Q9_2,Q9_3,Q9_4,Q9_5,Q9_6,Q10_1,Q10_2,Q10_3,Q10_4,Q11,Q12,Q13_3,Q13_4,Q13_8,Q13_9,Q13_10,Q13_11,Q13_15,Q13_16,Q14_3,Q14_4,Q14_8,Q14_9,Q14_10,Q14_11,Q14_15,Q14_16,*,Q15_1,Q15_2,Q15_3,Q15_4,Q15_5,Q15_6,Q15_7,Q15_8,Q16_1,Q16_2,Q16_3,Q16_4,Q17,Q18,LocationLatitude,LocationLongitude,LocationAccuracy
+ResponseID,ResponseSet,IPAddress,StartDate,EndDate,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalDataReference,Finished,Status,player_id,survey_id,User ID,Please indicate your Gender,Please indicate your Gender,Please indicate your dominant hand,Please indicate your dominant hand,How would you rate your experience (on a scale of 1-7) in manual assembly tasks?  1 - No experien...,"How would you rate your skill (on a scale 1-7) in manual assembly tasks? 1 - Not skilled at all,...",How would you rate the physical effort (on a scale of 1-7) required for the actions:-Insert long bolt into large box,How would you rate the physical effort (on a scale of 1-7) required for the actions:-Screw long bolt into large box,How would you rate the physical effort (on a scale of 1-7) required for the actions:-Insert short bolt into small box,How would you rate the physical effort (on a scale of 1-7) required for the actions:-Screw short bolt into small box,How would you rate the physical effort (on a scale of 1-7) required for the actions:-Insert short wire,How would you rate the physical effort (on a scale of 1-7) required for the actions:-Insert long wire,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Insert long bolt into large box,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Screw long bolt into large box,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Insert short bolt into small box,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Screw short bolt into small box,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Insert short wire,How would you rate the mental effort (on a scale of 1-7) required for the actions:-Insert long wire,Answer the following questions based on your final preference for Task A.,Order the actions according to your preference.-Insert long bolt into large box,Order the actions according to your preference.-Screw long bolt,Order the actions according to your preference.-Insert short bolt into small box,Order the actions according to your preference.-Screw short bolt,Order the actions according to your preference.-Insert short wire,Order the actions according to your preference.-Insert long wire,Did you consider the following factors in deciding your preferred sequence of actions?-Physical effort required to perform actions,Did you consider the following factors in deciding your preferred sequence of actions?-Mental effort required to perform actions,Did you consider the following factors in deciding your preferred sequence of actions?-Whether actions required the same tool,Did you consider the following factors in deciding your preferred sequence of actions?-Cost to switch from one action to next,Explain (in 1-3 lines) why you performed actions in the sequence you demonstrated. You can also s...,Please list and explain any other factors you took into account for deciding your preferred order...,"Given that, in task A, you rated the physical effort for insert long wire = ...-Insert main wing","Given that, in task A, you rated the physical effort for insert long wire = ...-Screw long bolt to main wing","Given that, in task A, you rated the physical effort for insert long wire = ...-Insert tail wing","Given that, in task A, you rated the physical effort for insert long wire = ...-Screw long bolt to tail wing","Given that, in task A, you rated the physical effort for insert long wire = ...-Screw propeller","Given that, in task A, you rated the physical effort for insert long wire = ...-Screw propeller base with nut","Given that, in task A, you rated the physical effort for insert long wire = ...-Insert long bolt in main wing","Given that, in task A, you rated the physical effort for insert long wire = ...-Insert long bolt in tail wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Insert main wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Screw long bolt to main wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Insert tail wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Screw long bolt to tail wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Screw propeller","Given that, in task A, you rated the mental effort for insert long wire = ...-Screw propeller base with nut","Given that, in task A, you rated the mental effort for insert long wire = ...-Insert long bolt in main wing","Given that, in task A, you rated the mental effort for insert long wire = ...-Insert long bolt in tail wing",Answer the following questions based on your final preference for Task B.,Order the actions according to your preference.-Insert main wing,Order the actions according to your preference.-Screw long bolt to main wing,Order the actions according to your preference.-Insert tail wing,Order the actions according to your preference.-Screw long bolt to tail wing,Order the actions according to your preference.-Screw propeller,Order the actions according to your preference.-Screw propeller base with nut,Order the actions according to your preference.-Insert long bolt in main wing,Order the actions according to your preference.-Insert long bolt in tail wing,Did you consider the following factors in deciding your preferred sequence of actions?-Physical effort required to perform actions,Did you consider the following factors in deciding your preferred sequence of actions?-Mental effort required to perform actions,Did you consider the following factors in deciding your preferred sequence of actions?-Whether actions required the same tool,Did you consider the following factors in deciding your preferred sequence of actions?-Cost to switch from one action to next,Explain (in 1-3 lines) why you performed actions in the sequence you demonstrated. You can also s...,Please list and explain any other factors you took into account for deciding your preferred order...,LocationLatitude,LocationLongitude,LocationAccuracy
+{'ImportId': 'responseId'},{'ImportId': 'responseSetId'},{'ImportId': 'ipAddress'},{'ImportId': 'startDate'},{'ImportId': 'endDate'},{'ImportId': 'panel-RecipientLastName'},{'ImportId': 'panel-RecipientFirstName'},{'ImportId': 'panel-RecipientEmail'},{'ImportId': 'panel-ExternalDataReference'},{'ImportId': 'finished'},{'ImportId': 'status'},{'ImportId': 'embeddedData-player_id'},{'ImportId': 'embeddedData-survey_id'},{'ImportId': 'QID16-TEXT'},{'ImportId': 'QID18'},{'ImportId': 'QID18-TEXT'},{'ImportId': 'QID58'},{'ImportId': 'QID58-TEXT'},{'ImportId': 'QID20'},{'ImportId': 'QID21'},{'ImportId': 'QID49-2'},{'ImportId': 'QID49-3'},{'ImportId': 'QID49-4'},{'ImportId': 'QID49-5'},{'ImportId': 'QID49-6'},{'ImportId': 'QID49-7'},{'ImportId': 'QID50-2'},{'ImportId': 'QID50-3'},{'ImportId': 'QID50-4'},{'ImportId': 'QID50-5'},{'ImportId': 'QID50-6'},{'ImportId': 'QID50-7'},{'ImportId': 'QID52'},{'ImportId': 'QID62-1'},{'ImportId': 'QID62-2'},{'ImportId': 'QID62-3'},{'ImportId': 'QID62-4'},{'ImportId': 'QID62-5'},{'ImportId': 'QID62-6'},{'ImportId': 'QID54-1'},{'ImportId': 'QID54-2'},{'ImportId': 'QID54-3'},{'ImportId': 'QID54-4'},{'ImportId': 'QID53-TEXT'},{'ImportId': 'QID56-TEXT'},{'ImportId': 'QID41-3'},{'ImportId': 'QID41-4'},{'ImportId': 'QID41-8'},{'ImportId': 'QID41-9'},{'ImportId': 'QID41-10'},{'ImportId': 'QID41-11'},{'ImportId': 'QID41-15'},{'ImportId': 'QID41-16'},{'ImportId': 'QID42-3'},{'ImportId': 'QID42-4'},{'ImportId': 'QID42-8'},{'ImportId': 'QID42-9'},{'ImportId': 'QID42-10'},{'ImportId': 'QID42-11'},{'ImportId': 'QID42-15'},{'ImportId': 'QID42-16'},{'ImportId': 'QID46'},{'ImportId': 'QID63-1'},{'ImportId': 'QID63-2'},{'ImportId': 'QID63-3'},{'ImportId': 'QID63-4'},{'ImportId': 'QID63-5'},{'ImportId': 'QID63-6'},{'ImportId': 'QID63-7'},{'ImportId': 'QID63-8'},{'ImportId': 'QID59-1'},{'ImportId': 'QID59-2'},{'ImportId': 'QID59-3'},{'ImportId': 'QID59-4'},{'ImportId': 'QID39-TEXT'},{'ImportId': 'QID44-TEXT'},{'ImportId': 'Location-LocationLatitude'},{'ImportId': 'Location-LocationLongitude'},{'ImportId': 'Location-LocationAccuracy'}
+R_1g1jbSIPWUS6HVC,null,207.151.52.33,2022-01-21 09:25:56,2022-01-21 09:32:52,,,,,1,0,55041,11412,50,1,,2,,6,5,1,3,1,1,2,4,1,1,1,1,2,3,1,3,4,5,6,2,1,7,6,4,1,High effort actions first.,,2,3,1,1,3,1,1,1,2,1,1,1,3,1,1,1,1,1,3,6,8,4,5,2,7,5,6,2,4,High effort actions first.,,34.032196044921875,-118.28359985351562,-1
+R_9tA2beMGY05xXhv,null,207.151.52.33,2022-01-21 11:15:25,2022-01-21 11:20:04,,,,,1,0,62075,70885,49,1,,2,,7,7,1,4,1,2,3,5,2,1,2,1,3,5,1,3,4,1,2,5,6,7,7,2,4,Easiest first.,,3,4,2,2,5,1,1,1,3,1,2,1,5,1,1,1,1,4,6,1,3,7,8,5,2,7,7,2,2,Easiest first.,,34.032196044921875,-118.28359985351562,-1
+R_2VPevIOfs8GK0mu,null,207.151.52.33,2022-01-21 14:55:39,2022-01-21 16:22:50,,,,,1,0,14789,34598,48,1,,2,,4,5,3,5,1,2,3,6,3,4,2,3,5,7,1,1,2,4,5,6,3,2,5,1,6,All actions on one part and then aother. High mental and physical effort later than low on each part.,,5,4,3,6,3,2,2,1,5,2,2,3,6,5,3,3,1,2,4,6,8,1,5,3,7,4,6,1,5,"Higher mental effort first and same part together. If I insert tail wing first, then it will be incovient to insert main wing so do main wing first.",,34.032196044921875,-118.28359985351562,-1
+R_3DevNXPwkCiTrlh,null,207.151.52.33,2022-01-24 13:55:16,2022-01-24 14:52:09,,,,,1,0,59680,46940,46,1,,2,,5,5,1,5,1,3,2,4,2,1,1,1,3,5,1,5,6,2,3,1,4,2,2,7,8,Short box because it takes less space. Wire just needs one part so first. Long bolt screwing takes long time.,,2,6,1,4,5,2,1,1,2,1,2,1,4,1,1,1,1,1,6,2,4,7,8,5,3,6,1,7,8,Robot is slow. Do time consuming actions while robot is fetching. Main wing offers support.,,34.032196044921875,-118.28359985351562,-1
+R_3qa3iyALmF5HoxK,null,207.151.52.33,2022-01-24 18:49:32,2022-01-24 19:36:10,,,,,1,0,79990,83855,45,1,,2,,6,6,1,4,1,3,2,2,1,1,1,1,3,3,1,2,3,5,6,4,1,1,1,2,5,Long wire take longest so have robot fetch in the meantime.,Doing thigns in the same order for each box.,5,3,4,3,3,1,1,1,2,1,3,1,2,1,1,1,1,1,4,2,6,7,8,3,5,6,2,2,5,Get out of chair oncce for main wing. While inserting tail wing get tools and fasteners. Main inserted first so screwed first.,Getting up from chair. Completeing sections. Ordering.,34.032196044921875,-118.28359985351562,-1
+R_ptpAR3SA1sLxf6p,null,207.151.52.33,2022-01-18 12:58:10,2022-01-18 13:49:55,,,,,0,0,71133,,50,1,,2,,6,6,3,3,3,3,3,3,4,4,4,4,4,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1
+R_2xXYCkQB6hYN9UB,null,207.151.52.33,2022-01-25 17:07:43,2022-01-25 17:53:22,,,,,1,0,88656,15302,44,1,,1,,6,5,2,4,1,3,5,6,2,1,2,1,5,7,1,1,2,4,5,6,3,4,4,1,8,"Large box before small box (arbitrary). Foe each box, screws first as easier than wire.",,5,7,2,7,5,3,3,3,1,5,4,4,5,3,2,2,1,6,8,3,5,1,2,7,4,5,5,2,7,Small to big to have more space. Propeller intricate. Screw after inserting so it doesn't fall out. Not switch to a new part.,,34.032196044921875,-118.28359985351562,-1
+R_2DM4q721KDAub1n,null,207.151.52.33,2022-01-21 09:21:20,2022-01-21 11:15:21,,,,,0,0,85412,,50,1,,2,,6,5,1,2,1,1,1,3,1,1,1,1,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1
+R_3CSiBVfUgJtbwNX,null,207.151.52.33,2022-01-28 09:00:53,2022-01-28 12:03:01,,,,,1,0,24559,49036,43,1,,2,,3,3,2,7,2,4,4,6,1,1,1,1,6,7,1,2,3,5,6,4,1,7,7,7,7,Most challenging first. Same part so long bolt next.,number of times of asking robot to retrieve new tools,2,5,2,5,7,3,2,2,3,2,3,2,2,2,2,2,1,3,5,6,8,1,2,4,7,4,4,7,7,From front to tail. Same part.,Front to tail sequence.,34.032196044921875,-118.28359985351562,-1
+R_W2rekPlDFuTsACt,null,68.181.17.195,2022-01-28 13:02:52,2022-01-28 17:30:29,,,,,1,0,90213,90626,42,1,,2,,6,6,2,4,2,4,5,6,2,2,2,2,3,4,1,2,3,5,6,4,1,5,7,5,5,Long wire first as it requires most dexterity. Stick to same part so do long bolt next.,,5,3,5,3,3,3,2,2,4,2,3,2,2,2,2,2,1,6,8,3,5,1,2,7,4,6,5,2,5,"Chose actions that kept the plane body as easy to move around as possible, trying to move with the wing attached is very cumbersome.",Ease of manipulating the working piece,34.032196044921875,-118.28359985351562,-1
+R_2tLIQU08KHV4Nxi,null,207.151.52.33,2022-02-06 10:09:23,2022-02-06 11:54:52,,,,,1,0,85571,26394,1,1,,2,,5,6,3,4,3,3,4,5,3,3,3,3,3,4,1,2,3,5,6,4,1,8,8,8,8,Long wire takes a lot of time so first. Long bolt also takes time and on same part. Wire requires more concentration.,,5,4,4,3,4,5,4,3,4,3,3,3,3,2,3,3,1,2,4,5,7,1,8,3,6,7,7,7,6,Propeller takes least amount of space. Smaller component so can be put to the side. Already have tool. Main wing will allow for balancing. Screw after insert as wing may move. Blades are fragile so attach nut at the end.,"Space, balancing, fragility, fixing to ensure stability of parts",34.032196044921875,-118.28359985351562,-1
+R_DogZVELh3OO4qgF,null,207.151.52.33,2022-02-06 12:45:38,2022-02-06 14:59:17,,,,,1,0,76112,75576,2,1,,2,,4,7,1,3,1,1,1,3,1,3,1,1,2,2,1,5,6,3,4,1,2,6,8,7,7,Short and long wire are similar activities. Short wire is practice for long wire. Long wire most mental effort. Success of doing small/short first.,Similarity of actions.,1,4,4,2,3,1,2,1,1,5,1,1,2,1,1,1,1,6,8,3,5,1,2,7,4,7,6,7,8,Propeller small not take much space. Tail wing smaller so before main wing.,Space requirement.,34.032196044921875,-118.28359985351562,-1
+R_26nmkNd29G5fQxp,null,207.151.52.33,2022-02-06 15:36:57,2022-02-06 17:23:49,,,,,1,0,95788,61944,3,1,,2,,1,4,2,3,2,2,3,3,1,1,1,1,1,2,1,1,3,2,4,5,6,5,1,7,7,Same tool so do screwing together. Wire require less physical effort (and some mental effort) so later.,,2,3,2,3,3,2,2,2,2,1,2,1,2,1,1,1,1,2,6,3,7,1,8,4,5,4,1,4,4,"Larger parts later, more space first. Similar actions together. Main wing first to balance. Propeller nit at end to not put weight on the blades.",Space requirement.,34.032196044921875,-118.28359985351562,-1
+R_xzNOxXwOA0w8VZ7,null,207.151.52.33,2022-02-07 13:58:13,2022-02-07 15:21:10,,,,,1,0,87361,78280,11,2,,2,,4,6,1,2,1,2,2,2,1,2,1,1,2,2,1,2,3,1,4,5,6,1,1,8,8,Same tool action consecutively.  Keep activity that takes more time (wires) at the end.,Wires may come out while screwing so do screwing first.,2,3,1,2,2,3,1,1,1,2,1,2,2,2,1,1,1,3,8,4,6,1,2,7,5,2,1,8,8,"Screw propeller is separate from the rest of the task, save space. Screw main wing takes more time. Insert and screw bolt at the same time.",,34.032196044921875,-118.28359985351562,-1
+R_1NvsOIzxDKz3R4d,null,207.151.52.33,2022-02-07 15:57:00,2022-02-07 17:13:47,,,,,1,0,17262,81433,12,1,,2,,4,5,1,1,1,1,1,2,1,1,1,1,1,1,1,1,2,3,4,5,6,8,7,8,7,"Wire is not easy to shape, takes more physical effort, so do at the end. No preference in screwing long bolt / short bolt first.",,3,2,1,2,2,1,1,1,1,1,1,1,2,1,1,1,1,2,4,5,7,1,8,3,6,1,2,8,7,Propeller is separate from the plane so can do it and keep it aside. Propeller at end could also be fine. Plane can be stable after inserting main wing. Screw right after insert to not let it fall apart.,,34.032196044921875,-118.28359985351562,-1
+R_2U3O0uKLs0LRhdG,null,207.151.52.33,2022-02-07 18:57:39,2022-02-07 20:12:49,,,,,1,0,42607,70039,13,2,,2,,4,2,1,3,1,2,2,3,1,1,1,1,3,4,1,4,5,3,6,1,2,4,2,8,4,"Wires are a bit time consuming and requires patience, so wires before bolts. Short wire first as it would be easier of the two. Screwing at one to not have to put the tool down.",,3,6,2,2,5,2,1,2,3,2,2,1,2,1,2,2,1,3,5,1,6,7,8,4,2,8,6,8,8,"Inserting tail wing is quick and easy. Bolts could get mixed so insert them first. Attach propeller hub at the end so that plane can be turned over after attaching wings, then do propeller blades right before it.",,34.032196044921875,-118.28359985351562,-1
+R_eaj8MB4rXVHO98J,null,207.151.52.187,2022-05-10 12:59:14,2022-05-10 14:03:18,,,,,1,0,58225,76507,4,1,,2,,4,3,2,3,1,2,2,2,1,1,1,1,1,2,1,5,6,1,2,3,4,2,2,5,2,Short bolt easier than short wire. Long bolt takes most time and concentration (hold the screwdriver straight) so at the end.,Would consider tool similarity in a different task with three-four more boxes and screwing actions. Mentally easy to just repeat the training order.,4,4,3,4,4,3,1,1,2,2,2,2,3,2,1,3,1,2,4,5,7,1,8,3,6,7,6,7,6,"Can sit and so screw propeller so first, then stand and do remaining. Tail wing is easier and fragile so at the end. Complete main winf so screw after insert so that it is not loose when doing next tasks. Save task b y doing in sequence, not go from main wing back to propellers.",,34.0322,-118.2836,-1
+R_2q27xUTCoirpR2H,null,207.151.52.187,2022-05-17 11:44:45,2022-05-17 12:50:33,,,,,1,0,71836,81952,5,1,,2,,5,5,1,1,1,1,1,1,1,1,1,1,1,2,1,1,2,4,5,6,3,1,1,1,5,One box at a time. Longer task first. Long bolt takes time. Symmetry so also short bolt before short wire.,,1,2,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,2,4,5,7,1,8,3,6,1,1,2,5,"cost of actions,aka time, and then it was more towards organization of tasks, to from main wing to tail wing","stabilization, why prop first because its the longer task",34.0322,-118.2836,-1
diff --git a/data/features_1.p b/data/features_1.p
new file mode 100644
index 0000000..7574826
Binary files /dev/null and b/data/features_1.p differ
diff --git a/data/q_values_1.p b/data/q_values_1.p
new file mode 100644
index 0000000..656ed79
Binary files /dev/null and b/data/q_values_1.p differ
diff --git a/data/q_values_4.p b/data/q_values_4.p
new file mode 100644
index 0000000..e8af539
Binary files /dev/null and b/data/q_values_4.p differ
diff --git a/data/q_values_5.p b/data/q_values_5.p
new file mode 100644
index 0000000..ed217f2
Binary files /dev/null and b/data/q_values_5.p differ
diff --git a/data/states_1.p b/data/states_1.p
new file mode 100644
index 0000000..950e5bd
Binary files /dev/null and b/data/states_1.p differ
diff --git a/data/states_4.p b/data/states_4.p
new file mode 100644
index 0000000..950e5bd
Binary files /dev/null and b/data/states_4.p differ
diff --git a/data/states_5.p b/data/states_5.p
new file mode 100644
index 0000000..950e5bd
Binary files /dev/null and b/data/states_5.p differ
diff --git a/data/task_1.p b/data/task_1.p
new file mode 100644
index 0000000..a392a20
Binary files /dev/null and b/data/task_1.p differ
diff --git a/data/weights_1.p b/data/weights_1.p
new file mode 100644
index 0000000..7ffc094
Binary files /dev/null and b/data/weights_1.p differ
diff --git a/data/weights_4.p b/data/weights_4.p
new file mode 100644
index 0000000..e0f8386
Binary files /dev/null and b/data/weights_4.p differ
diff --git a/data/weights_5.p b/data/weights_5.p
new file mode 100644
index 0000000..b2cd45f
Binary files /dev/null and b/data/weights_5.p differ
diff --git a/proactive_assembly_v2.py b/proactive_assembly_v2.py
index b9393cb..eb9151d 100644
--- a/proactive_assembly_v2.py
+++ b/proactive_assembly_v2.py
@@ -179,7 +179,7 @@ def __init__(self):
                         "tail screw": [container1_4, container1_4Pose, container1GraspPose, container1GraspOffset, container1URDFUri],
                         "propeller blades": [container2_1, container2_1Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
                         "tool": [container2_2, container2_2Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
-                        "main wing": [mainWing, wingURDFUri],
+                        "main wing": [mainWing, wingPose, 0,0, wingURDFUri],
                         "airplane body": []}
                         
         # ------------------------------------------------ Get robot config ---------------------------------------------- #
@@ -613,7 +613,6 @@ def deliver_part(self):
                         self.hand.execute_preshape([0.15, 0.15])
                         # self.world.remove_skeleton(obj)
                     else:
-                        # hold the grasped object and wait for user to grab one propeller blade
                         if chosen_obj == "tool":
                             # hold tool box until user picks up tool and put tool back
                             while("tool" in self.remaining_objects):
diff --git a/reactive_assembly_v2.py b/reactive_assembly_v2.py
index b96cca9..3a97079 100644
--- a/reactive_assembly_v2.py
+++ b/reactive_assembly_v2.py
@@ -179,7 +179,7 @@ def __init__(self):
                         "tail screw": [container1_4, container1_4Pose, container1GraspPose, container1GraspOffset, container1URDFUri],
                         "propeller blades": [container2_1, container2_1Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
                         "tool": [container2_2, container2_2Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
-                        "main wing": [mainWing, wingURDFUri],
+                        "main wing": [mainWing, wingPose, 0,0, wingURDFUri],
                         "airplane body": []}
 
         # ------------------------------------------------ Get robot config ---------------------------------------------- #
diff --git a/src/actual_assembly_action_recognition.py b/src/actual_assembly_action_recognition.py
index 7f03993..041dbec 100644
--- a/src/actual_assembly_action_recognition.py
+++ b/src/actual_assembly_action_recognition.py
@@ -408,7 +408,7 @@ def video_demo():
 
     ground_truth_action_sequence = [1,7,8,2,5,6]
 
-    capture = cv2.VideoCapture(0)
+    capture = cv2.VideoCapture(-1)
     capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
     capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
     # capture.set(cv2.CAP_PROP_POS_FRAMES,6000)
diff --git a/src/assembly_tasks.py b/src/assembly_tasks.py
new file mode 100644
index 0000000..b84a98a
--- /dev/null
+++ b/src/assembly_tasks.py
@@ -0,0 +1,391 @@
+import numpy as np
+from copy import deepcopy
+
+
+class AssemblyTask:
+
+    def __init__(self, features):
+
+        self.num_actions, self.num_features = np.shape(features)
+        self.actions = np.array(range(self.num_actions))
+        self.features = np.array(features)
+
+        self.min_value, self.max_value = 1.0, 7.0  # rating are on 1-7 Likert scale
+
+        # start state of the assembly task (none of the actions have been performed)
+        self.s_start = [0] * self.num_actions + [-1, -1]
+        self.s_end = []
+
+        self.states = [self.s_start]
+        self.terminal_idx = []
+
+    def scale_features(self):
+        self.features = (np.array(self.features) - self.min_value) / (self.max_value - self.min_value)
+
+    def convert_to_rankings(self):
+        for feature_idx in range(self.num_features):
+            feature_values = self.features[:, feature_idx]
+            sorted_values = [x for x, _, _ in sorted(zip(self.actions, feature_values, self.nominal_features)
+                                                     , key=lambda y: (y[1], y[2]))]
+            feature_ranks = np.array(sorted_values).argsort() + 1
+            self.features[:, feature_idx] = feature_ranks
+
+    def set_end_state(self, user_demo):
+        terminal_state = list(np.bincount(user_demo))
+
+        for curr_a in self.actions:
+            _, prev_s = self.back_transition(terminal_state, curr_a)
+            if prev_s:
+                rem_actions = list(user_demo)
+                rem_actions.remove(curr_a)
+                for prev_a in set(rem_actions):
+                    _, s = self.back_transition(prev_s, prev_a)
+                    if s:
+                        self.s_end.append(terminal_state + [curr_a, prev_a])
+
+    def enumerate_states(self):
+        prev_states = self.states.copy()
+        while prev_states:
+            next_states = []
+            for state in prev_states:
+                for action in self.actions:
+                    p, next_state = self.transition(state, action)
+                    if next_state and (next_state not in next_states) and (next_state not in self.states):
+                        next_states.append(next_state)
+
+            prev_states = next_states
+            self.states += prev_states
+
+    def enumerate_trajectories(self, demos):
+
+        n_demos, n_steps = np.shape(demos)
+        all_traj = [[(-1, -1, 0)]]
+        for t in range(n_steps):
+            all_traj_new = []
+            for traj in all_traj:
+                prev_state = traj[-1][2]
+                for action in self.actions:
+                    p, next_state = self.transition(self.states[prev_state], action)
+                    if next_state:
+                        new_traj = deepcopy(traj)
+                        new_traj.append((prev_state, action, self.states.index(next_state)))
+                        all_traj_new.append(new_traj)
+
+            all_traj = deepcopy(all_traj_new)
+        all_traj = np.array(all_traj)
+
+        return all_traj[:, 1:, :]
+
+    def set_terminal_idx(self):
+        self.terminal_idx = [self.states.index(s_terminal) for s_terminal in self.s_end]
+
+    def get_features(self, state, new_feature=False):
+
+        # calculate current phase
+        terminal_state = self.s_end[-1]
+        max_phase = sum(terminal_state[:-2])
+        phase = sum(state[:-2]) / max_phase
+
+        curr_a, prev_a = state[-2], state[-1]
+
+        if curr_a >= 0:
+            e_p, e_m = self.features[curr_a]
+        else:
+            e_p, e_m = 0.0, 0.0
+
+        if prev_a >= 0:
+            c_part = self.part_similarity[prev_a][curr_a]
+            c_tool = self.tool_similarity[prev_a][curr_a]
+        else:
+            c_part, c_tool = 0.0, 0.0
+
+        # feature_value = [phase * e_p, phase * e_m, c_part, c_tool]
+        feature_value = [phase * e_p, phase * e_m, (1.0 - phase) * e_p, (1.0 - phase) * e_m, c_part, c_tool]
+        if new_feature:
+            if curr_a == 0 and state[1] == 0:
+                s_part = 1.0
+            elif curr_a == 1 and state[0] == 1:
+                s_part = 1.0
+            else:
+                s_part = 0.0
+            feature_value += [s_part]
+        feature_value = np.array(feature_value)
+
+        # n_actions, n_features = np.array(action_features).shape
+        # feature_value = np.zeros(n_features)
+        # for action, executed in enumerate(state):
+        #     feature_value += executed * np.array(action_features[action])
+
+        return feature_value
+
+    def prev_states(self, s_to):
+        previous_states = []
+
+        curr_a = s_to[-2]
+        if curr_a >= 0:
+            s_from = deepcopy(s_to[:-2])
+            s_from[curr_a] -= 1
+            prev_a = s_to[-1]
+
+            previous_state = deepcopy(s_from)
+            previous_state.append(prev_a)
+            if prev_a >= 0:
+                hist_s = deepcopy(s_from)
+                hist_s[prev_a] -= 1
+                hist_actions = [a for a, s in enumerate(hist_s) if s >= 1]
+                if hist_actions:
+                    for hist_a in hist_actions:
+                        prob, s = self.back_transition(hist_s, hist_a)
+                        if s:
+                            prev_s = deepcopy(previous_state)
+                            prev_s.append(hist_a)
+                            previous_states.append(prev_s)
+                else:
+                    hist_a = -1
+                    previous_state.append(hist_a)
+                    previous_states.append(previous_state)
+            else:
+                hist_a = -1
+                previous_state.append(hist_a)
+                previous_states.append(previous_state)
+
+        return previous_states
+
+
+# ----------------------------------------------- Canonical Task ----------------------------------------------------- #
+
+class CanonicalTask(AssemblyTask):
+    """
+    Actions:
+    0 - insert long bolt
+    1 - insert short bolt
+    2 - insert wire (short)
+    3 - screw long bolt
+    4 - screw short bolt
+    5 - insert wire (long)
+
+    feature values for each action = [physical_effort, mental_effort]
+    """
+
+    nominal_features = [[1.2, 1.1],  # insert long bolt
+                        [1.1, 1.1],  # insert short bolt
+                        [4.0, 6.0],  # insert wire (short)
+                        [6.0, 2.0],  # screw long bolt
+                        [2.0, 2.0],  # screw short bolt
+                        [5.0, 6.9]]  # insert wire (long)
+
+    part_similarity = [[1, 0, 0, 1, 0, 1],
+                       [0, 1, 1, 0, 1, 0],
+                       [0, 1, 1, 0, 1, 0],
+                       [1, 0, 0, 1, 0, 1],
+                       [0, 1, 1, 0, 1, 0],
+                       [1, 0, 0, 1, 0, 1]]
+
+    tool_similarity = [[1, 0, 0, 0, 0, 0],
+                       [0, 1, 0, 0, 0, 0],
+                       [0, 0, 1, 0, 0, 0],
+                       [0, 0, 0, 1, 1, 0],
+                       [0, 0, 0, 1, 1, 0],
+                       [0, 0, 0, 0, 0, 1]]
+
+    @staticmethod
+    def transition(s_from, a):
+        # preconditions
+        if s_from[a] < 1:
+            if a in [0, 1, 2, 5]:
+                prob = 1.0
+            elif a in [3, 4] and s_from[a - 3] == 1:
+                prob = 1.0
+            else:
+                prob = 0.0
+        else:
+            prob = 0.0
+
+        # transition to next state
+        if prob == 1.0:
+            s_to = deepcopy(s_from)
+            s_to[a] += 1
+            s_to[-1] = s_from[-2]
+            s_to[-2] = a
+            return prob, s_to
+        else:
+            return prob, None
+
+    @staticmethod
+    def back_transition(s_to, a):
+        # preconditions
+        if s_to[a] > 0:
+            if a in [0, 1] and s_to[a + 3] < 1:
+                p = 1.0
+            elif a in [2, 3, 4, 5]:
+                p = 1.0
+            else:
+                p = 0.0
+        else:
+            p = 0.0
+
+        # transition to next state
+        if p == 1.0:
+            s_from = deepcopy(s_to)
+            s_from[a] -= 1
+            return p, s_from
+        else:
+            return p, None
+
+
+# ------------------------------------------------ Complex Task ----------------------------------------------------- #
+
+class ComplexTask(AssemblyTask):
+    """
+    Actions:
+    0 - insert main wing
+    1 - insert tail wing
+    2 - insert long bolt into main wing
+    3 - insert long bolt into tail wing
+    4 - screw long bolt into main wing
+    5 - screw long bolt into tail wing
+    6 - screw propeller
+    7 - screw propeller base
+
+    """
+
+    nominal_features = [[3.5, 3.5],  # insert main wing
+                        [2.0, 3.0],  # insert tail wing
+                        [1.2, 1.1],  # insert long bolt into main wing
+                        [1.1, 1.1],  # insert long bolt into tail wing
+                        [2.1, 2.1],  # screw long bolt into main wing
+                        [2.0, 2.0],  # screw long bolt into tail wing
+                        [3.5, 6.0],  # screw propeller
+                        [2.0, 3.5]]  # screw propeller base
+
+    part_similarity = [[1, 0, 1, 0, 1, 0, 0, 0],
+                       [0, 1, 0, 1, 0, 1, 0, 0],
+                       [1, 0, 1, 0, 1, 0, 0, 0],
+                       [0, 1, 0, 1, 0, 1, 0, 0],
+                       [1, 0, 1, 0, 1, 0, 0, 0],
+                       [0, 1, 0, 1, 0, 1, 0, 0],
+                       [0, 0, 0, 0, 0, 0, 1, 1],
+                       [0, 0, 0, 0, 0, 0, 1, 1]]
+
+    tool_similarity = [[1, 0, 0, 0, 0, 0, 0, 0],
+                       [0, 1, 0, 0, 0, 0, 0, 0],
+                       [0, 0, 1, 0, 0, 0, 0, 0],
+                       [0, 0, 0, 1, 0, 0, 0, 0],
+                       [0, 0, 0, 0, 1, 1, 1, 0],
+                       [0, 0, 0, 0, 1, 1, 1, 0],
+                       [0, 0, 0, 0, 1, 1, 1, 0],
+                       [0, 0, 0, 0, 0, 0, 0, 1]]
+
+    @staticmethod
+    def transition(s_from, a):
+        # preconditions
+        if a in [0, 1] and s_from[a] < 1:
+            p = 1.0
+        elif a == 2 and s_from[a] < 4 and s_from[0] == 1:
+            p = 1.0
+        elif a == 3 and s_from[a] < 1 and s_from[1] == 1:
+            p = 1.0
+        elif a == 4 and s_from[a] < 4 and s_from[a] + 1 <= s_from[a - 2]:
+            p = 1.0
+        elif a == 5 and s_from[a] < 1 and s_from[a] + 1 <= s_from[a - 2]:
+            p = 1.0
+        elif a == 6 and s_from[a] < 4:
+            p = 1.0
+        elif a == 7 and s_from[a] < 1 and s_from[a - 1] == 4:
+            p = 1.0
+        else:
+            p = 0.0
+
+        # transition to next state
+        if p == 1.0:
+            s_to = deepcopy(s_from)
+            s_to[a] += 1
+            s_to[-1] = s_from[-2]
+            s_to[-2] = a
+            return p, s_to
+        else:
+            return p, None
+
+    @staticmethod
+    def back_transition(s_to, a):
+        # preconditions
+        if s_to[a] > 0:
+            if a == 0 and s_to[2] < 1:
+                p = 1.0
+            elif a == 1 and s_to[3] < 1:
+                p = 1.0
+            elif a in [2, 3] and s_to[a] > s_to[a + 2]:
+                p = 1.0
+            elif a in [6] and s_to[a + 1] < 1:
+                p = 1.0
+            elif a in [4, 5, 7]:
+                p = 1.0
+            else:
+                p = 0.0
+        else:
+            p = 0.0
+
+        # transition to next state
+        if p == 1.0:
+            s_from = deepcopy(s_to)
+            s_from[a] -= 1
+            return p, s_from
+        else:
+            return p, None
+
+    # ----------------------------------------------- Event sequence ------------------------------------------------ #
+    # @staticmethod
+    # def transition(self, s_from, a):
+    #     # preconditions
+    #     if a in [0, 1] and s_from[a] < 1:
+    #         p = 1.0
+    #     elif a == 2 and s_from[a] < 1 and s_from[0] == 1:
+    #         p = 1.0
+    #     elif a == 3 and s_from[a] < 1 and s_from[1] == 1:
+    #         p = 1.0
+    #     elif a == 4 and s_from[a] < 1 and s_from[a] + 1 <= s_from[a - 2]:
+    #         p = 1.0
+    #     elif a == 5 and s_from[a] < 1 and s_from[a] + 1 <= s_from[a - 2]:
+    #         p = 1.0
+    #     elif a == 6 and s_from[a] < 1:
+    #         p = 1.0
+    #     elif a == 7 and s_from[a] < 1 and s_from[a - 1] == 1:
+    #         p = 1.0
+    #     else:
+    #         p = 0.0
+    #
+    #     # transition to next state
+    #     if p == 1.0:
+    #         s_to = deepcopy(s_from)
+    #         s_to[a] += 1
+    #         s_to[-1] = a
+    #         return p, s_to
+    #     else:
+    #         return p, None
+    #
+    # @staticmethod
+    # def back_transition(self, s_to, a):
+    #     # preconditions
+    #     if s_to[a] > 0:
+    #         if a == 0 and s_to[2] < 1:
+    #             p = 1.0
+    #         elif a == 1 and s_to[3] < 1:
+    #             p = 1.0
+    #         elif a in [2, 3] and s_to[a] > s_to[a + 2]:
+    #             p = 1.0
+    #         elif a in [6] and s_to[a + 1] < 1:
+    #             p = 1.0
+    #         elif a in [4, 5, 7]:
+    #             p = 1.0
+    #         else:
+    #             p = 0.0
+    #     else:
+    #         p = 0.0
+    #
+    #     # transition to next state
+    #     if p == 1.0:
+    #         s_from = deepcopy(s_to)
+    #         s_from[a] -= 1
+    #         return p, s_from
+    #     else:
+    #         return p, None
diff --git a/src/canonical_assembly_action_recognition.py b/src/canonical_assembly_action_recognition.py
index d39c6c9..8facc98 100644
--- a/src/canonical_assembly_action_recognition.py
+++ b/src/canonical_assembly_action_recognition.py
@@ -78,7 +78,12 @@
 
 txt_dir = "./"
 
-tool_detected = False
+# global variables for detecting tool
+detect_tool = False
+detect_empty_tool = False
+last_empty_tool = False 
+tool_off_counter = 0.0
+starttime_tool = time.time()
 
 
 class Action:
@@ -107,7 +112,7 @@ def has_parts(self, part_set):
 #                Action([5], 'Insert long wire', [[25, 30]]), 
 #                ]
 
-parts_list = {"18": "tool",
+parts_list = {"17": "tool",
               "21": "long bolts",
               "22": "short bolts",
               "24": "short wire",
@@ -118,9 +123,9 @@ def has_parts(self, part_set):
 
 # TODO: add flipped tags here
 actions_list = [Action([0], 'Insert long bolt', [[25, 21]]),
-               Action([3], 'Screw long bolt', [[18, 25, 21, 16]]),
+               Action([3], 'Screw long bolt', [[17, 25, 21, 16]]),
                Action([1], 'Insert short bolt', [[26, 22]]),
-               Action([4], 'Screw short bolt', [[18, 26, 22, 15]]),
+               Action([4], 'Screw short bolt', [[17, 26, 22, 15]]),
                Action([2], 'Insert short wire', [[26, 24]]),
                Action([5], 'Insert long wire', [[25, 30]]),
                ]
@@ -138,7 +143,8 @@ def has_parts(self, part_set):
 
 
 def detect_apriltag(gray, image, state):
-    global performed_actions, part_set, action_sequence, tool_detected
+    global performed_actions, part_set, action_sequence
+    global detect_tool, detect_empty_tool, last_empty_tool, tool_off_counter, starttime_tool
     
     ifRecord = False
 
@@ -149,19 +155,33 @@ def detect_apriltag(gray, image, state):
     # results = detector.detect(img=gray,True, camera_params=[544.021136,542.307110,308.111905,261.603373], tag_size=0.044)
     results = detector.detect(img=gray)
 
-    if len(results) > 0:
-        # print("[INFO] {} total AprilTags detected".format(len(results)))
-        useless = 0
-    else:
-        # print("No AprilTag Detected")
-        return image, ifRecord
+    # if len(results) > 0:
+    #     # print("[INFO] {} total AprilTags detected".format(len(results)))
+    #     useless = 0
+    # else:
+    #     # print("No AprilTag Detected")
+    #     return image, ifRecord
+
+    detect_tool = False 
+    detect_empty_tool = False
 
     # loop over the AprilTag detection results
     for r in results:
         # AprilTag state
-        if (r.tag_id == 18 or r.tag_id == 17) and tool_detected==False:
-            tool_detected = True
-        if r.tag_id > 32:
+        if r.tag_id == 18:
+            # tool box
+            # detect_tool = True 
+            if not 18 in part_set:
+                part_set.add(18)
+
+        elif r.tag_id == 17:
+            # tool empty tag
+            detect_tool = True 
+            detect_empty_tool = True
+            if not 17 in part_set:
+                part_set.add(17)
+        
+        elif r.tag_id > 32:
             print("tag id:",r.tag_id)
             continue
 
@@ -207,6 +227,20 @@ def detect_apriltag(gray, image, state):
 
     # print("[INFO] dist:",dist," tag pose:",t)
 
+    # tool
+    if detect_empty_tool == False and last_empty_tool == True:
+        starttime_tool = time.time()
+    if detect_empty_tool == False and last_empty_tool == False:
+        tool_off_counter = time.time() - starttime_tool
+    if detect_empty_tool == False and tool_off_counter >= 3.0:
+        if 17 in part_set:
+            part_set.remove(17)
+    if detect_tool == False and tool_off_counter >= 3.0:
+        if 18 in part_set:
+            part_set.remove(18)
+
+    last_empty_tool = detect_empty_tool
+
     return image, ifRecord
 
 
@@ -220,7 +254,7 @@ def video_demo():
 
     ground_truth_action_sequence = [1,7,8,2,5,6]
 
-    capture = cv2.VideoCapture(-2)
+    capture = cv2.VideoCapture(-1)
     capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
     capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
     # capture.set(cv2.CAP_PROP_POS_FRAMES,6000)
diff --git a/src/import_qualtrics.py b/src/import_qualtrics.py
new file mode 100644
index 0000000..ab0d044
--- /dev/null
+++ b/src/import_qualtrics.py
@@ -0,0 +1,49 @@
+import requests
+import zipfile
+import io
+
+def get_qualtrics_survey(dir_save_survey, survey_id):
+    """ automatically query the qualtrics survey data
+    guide https://community.alteryx.com/t5/Alteryx-Designer-Discussions/Python-Tool-Downloading-Qualtrics-Survey-Data-using-Python-API/td-p/304898 """
+
+    # Setting user Parameters
+    api_token = "S4D7SsvQvFco9hWlyAUyZonJeS5NNotMV3emkTMq"
+    file_format = "csv"
+    data_center = 'usc.ca1' # "<Organization ID>.<Datacenter ID>"
+
+    # Setting static parameters
+    request_check_progress = 0
+    progress_status = "in progress"
+    base_url = "https://{0}.qualtrics.com/API/v3/responseexports/".format(data_center)
+    headers = {
+        "content-type": "application/json",
+        "x-api-token": api_token,
+    }
+
+    # Step 1: Creating Data Export
+    download_request_url = base_url
+    download_request_payload = '{"format":"' + file_format + '","surveyId":"' + survey_id + '"}' # you can set useLabels:True to get responses in text format
+    download_request_response = requests.request("POST", download_request_url, data=download_request_payload, headers=headers)
+    progress_id = download_request_response.json()["result"]["id"]
+    # print(download_request_response.text)
+
+    # Step 2: Checking on Data Export Progress and waiting until export is ready
+    while request_check_progress < 100 and progress_status != "complete":
+        request_check_url = base_url + progress_id
+        request_check_response = requests.request("GET", request_check_url, headers=headers)
+        request_check_progress = request_check_response.json()["result"]["percentComplete"]
+
+    # Step 3: Downloading file
+    request_download_url = base_url + progress_id + '/file'
+    request_download = requests.request("GET", request_download_url, headers=headers, stream=True)
+
+    # Step 4: Unzipping the file
+    zipfile.ZipFile(io.BytesIO(request_download.content)).extractall(dir_save_survey)
+    print('Downloaded qualtrics survey')
+
+if __name__ == "__main__":
+
+    path = ""
+    learning_survey_id = "SV_8eoX63z06ZhVZRA"
+
+    get_qualtrics_survey(dir_save_survey = path, survey_id = learning_survey_id)
\ No newline at end of file
diff --git a/src/maxent_irl.py b/src/maxent_irl.py
new file mode 100644
index 0000000..ccdde08
--- /dev/null
+++ b/src/maxent_irl.py
@@ -0,0 +1,401 @@
+import numpy as np
+from src.vi import value_iteration
+from copy import deepcopy
+from src.assembly_tasks import *
+
+# ------------------------------------------------ IRL functions ---------------------------------------------------- #
+
+def get_trajectories(states, demonstrations, transition_function):
+    trajectories = []
+    for demo in demonstrations:
+        s = states[0]
+        trajectory = []
+        for action in demo:
+            p, sp = transition_function(s, action)
+            s_idx, sp_idx = states.index(s), states.index(sp)
+            trajectory.append((s_idx, action, sp_idx))
+            s = sp
+        trajectories.append(trajectory)
+
+    return trajectories
+
+
+def feature_expectation_from_trajectories(s_features, trajectories):
+    n_states, n_features = s_features.shape
+
+    fe = np.zeros(n_features)
+    for t in trajectories:  # for each trajectory
+        for s_idx, a, sp_idx in t:  # for each state in trajectory
+
+            fe += s_features[sp_idx]  # sum-up features
+
+    return fe / len(trajectories)  # average over trajectories
+
+
+def initial_probabilities_from_trajectories(states, trajectories):
+    n_states = len(states)
+    prob = np.zeros(n_states)
+
+    for t in trajectories:  # for each trajectory
+        prob[t[0][0]] += 1.0  # increment starting state
+
+    return prob / len(trajectories)  # normalize
+
+
+def compute_expected_svf(task, p_initial, reward, max_iters, eps=1e-5):
+
+    states, actions, terminal = task.states, task.actions, task.terminal_idx
+    n_states, n_actions = len(states), len(actions)
+
+    # Backward Pass
+    # 1. initialize at terminal states
+    zs = np.zeros(n_states)  # zs: state partition function
+    zs[terminal] = 1.0
+
+    # 2. perform backward pass
+    for i in range(2*n_states):
+        za = np.zeros((n_states, n_actions))  # za: action partition function
+        for s_idx in range(n_states):
+            for a in actions:
+                prob, sp = task.transition(states[s_idx], a)
+                if sp:
+                    sp_idx = task.states.index(sp)
+                    if zs[sp_idx] > 0.0:
+                        za[s_idx, a] += np.exp(reward[s_idx]) * zs[sp_idx]
+
+        zs = za.sum(axis=1)
+        zs[terminal] = 1.0
+
+    # 3. compute local action probabilities
+    p_action = za / zs[:, None]
+
+    # Forward Pass
+    # 4. initialize with starting probability
+    d = np.zeros((n_states, max_iters))  # d: state-visitation frequencies
+    d[:, 0] = p_initial
+
+    # 5. iterate for N steps
+    for t in range(1, max_iters):  # longest trajectory: n_states
+        for sp_idx in range(n_states):
+            parents = task.prev_states(states[sp_idx])
+            if parents:
+                for s in parents:
+                    s_idx = states.index(s)
+                    a = states[sp_idx][-1]
+                    d[sp_idx, t] += d[s_idx, t - 1] * p_action[s_idx, a]
+
+    # 6. sum-up frequencies
+    return d.sum(axis=1)
+
+
+def compute_expected_svf_using_rollouts(task, reward, max_iters):
+    states, actions, terminal = task.states, task.actions, task.terminal_idx
+    n_states, n_actions = len(states), len(actions)
+
+    qf, vf, _ = value_iteration(states, actions, task.transition, reward, terminal)
+    svf = np.zeros(n_states)
+    for _ in range(n_states):
+        s_idx = 0
+        svf[s_idx] += 1
+        while s_idx not in task.terminal_idx:
+            max_action_val = -np.inf
+            candidates = []
+            for a in task.actions:
+                p, sp = task.transition(states[s_idx], a)
+                if sp:
+                    if qf[s_idx][a] > max_action_val:
+                        candidates = [a]
+                        max_action_val = qf[s_idx][a]
+                    elif qf[s_idx][a] == max_action_val:
+                        candidates.append(a)
+
+            if not candidates:
+                print("Error: No candidate actions from state", s_idx)
+
+            take_action = np.random.choice(candidates)
+            p, sp = task.transition(states[s_idx], take_action)
+            s_idx = states.index(sp)
+            svf[s_idx] += 1
+
+    e_svf = svf/n_states
+
+    return e_svf
+
+
+def maxent_irl(task, s_features, trajectories, optim, init, eps=1e-3):
+
+    # states, actions = task.states, task.actions
+
+    # number of actions and features
+    n_states, n_features = s_features.shape
+
+    # length of each demonstration
+    _, demo_length, _ = np.shape(trajectories)
+
+    # compute feature expectation from trajectories
+    e_features = feature_expectation_from_trajectories(s_features, trajectories)
+
+    # compute starting-state probabilities from trajectories
+    p_initial = initial_probabilities_from_trajectories(task.states, trajectories)
+
+    # gradient descent optimization
+    omega = init(n_features)  # initialize our parameters
+    delta = np.inf  # initialize delta for convergence check
+
+    optim.reset(omega)  # re-start optimizer
+    while delta > eps:  # iterate until convergence
+        omega_old = omega.copy()
+
+        # compute per-state reward from features
+        reward = s_features.dot(omega)
+
+        # compute gradient of the log-likelihood
+        e_svf = compute_expected_svf_using_rollouts(task, reward, demo_length)
+        grad = e_features - s_features.T.dot(e_svf)
+
+        # perform optimization step and compute delta for convergence
+        optim.step(grad)
+
+        # re-compute delta for convergence check
+        delta = np.max(np.abs(omega_old - omega))
+        # print(delta)
+
+    # re-compute per-state reward and return
+    return s_features.dot(omega), omega
+
+
+# ----------------------------------------- Bayesian inference functions -------------------------------------------- #
+
+def boltzman_likelihood(state_features, trajectories, weights, rationality=0.99):
+    n_states, n_features = np.shape(state_features)
+    likelihood, rewards = [], []
+    for traj in trajectories:
+        feature_count = deepcopy(state_features[traj[0][0]])
+        for t in traj:
+            feature_count += deepcopy(state_features[t[2]])
+        total_reward = rationality * weights.dot(feature_count)
+        rewards.append(total_reward)
+        likelihood.append(np.exp(total_reward))
+
+    return likelihood, rewards
+
+
+def get_feature_count(state_features, trajectories):
+    feature_counts = []
+    for traj in trajectories:
+        feature_count = deepcopy(state_features[traj[0][0]])
+        for t in traj:
+            feature_count += deepcopy(state_features[t[2]])
+        feature_counts.append(feature_count)
+
+    return feature_counts
+
+
+# ------------------------------------------------ MDP functions ---------------------------------------------------- #
+
+def random_trajectory(states, demos, transition_function):
+    """
+    random predicted trajectory
+    """
+
+    demo = demos[0]
+    s, available_actions = 0, demo.copy()
+
+    generated_sequence, score = [], []
+    for take_action in demo:
+        candidates = []
+        for a in available_actions:
+            p, sp = transition_function(states[s], a)
+            if sp:
+                candidates.append(a)
+
+        if not candidates:
+            print(s)
+
+        options = list(set(candidates))
+        predict_action = np.random.choice(options)
+        if take_action in options:
+            acc = 1/len(options)
+        else:
+            acc = 0.0
+        score.append(acc)
+
+        generated_sequence.append(take_action)
+        p, sp = transition_function(states[s], take_action)
+        s = states.index(sp)
+        available_actions.remove(take_action)
+
+    return score, generated_sequence
+
+
+def rollout_trajectory(qf, states, transition_function, remaining_actions, start_state=0):
+
+    s = start_state
+    available_actions = deepcopy(remaining_actions)
+    generated_sequence = []
+    while len(available_actions) > 0:
+        print(s, available_actions)
+        max_action_val = -np.inf
+        candidates = []
+        for a in available_actions:
+            p, sp = transition_function(states[s], a)
+            if sp:
+                if qf[s][a] > max_action_val:
+                    candidates = [a]
+                    max_action_val = qf[s][a]
+                elif qf[s][a] == max_action_val:
+                    candidates.append(a)
+                    max_action_val = qf[s][a]
+
+        if not candidates:
+            print(s)
+        take_action = np.random.choice(candidates)
+        generated_sequence.append(take_action)
+        p, sp = transition_function(states[s], take_action)
+        s = states.index(sp)
+        available_actions.remove(take_action)
+
+
+    return generated_sequence
+
+
+def predict_trajectory(qf, states, demos, transition_function, sensitivity=0, consider_options=False):
+
+    # assume the same starting state and available actions for all users
+    demo = demos[0]  # TODO: for demo in demos:
+    s, available_actions = 0, demo.copy()
+
+    scores, predictions, options = [], [], []
+    for take_action in demo:
+
+        max_action_val = -np.inf
+        candidates, applicants = [], []
+        for a in available_actions:
+            p, sp = transition_function(states[s], a)
+            if sp:
+                applicants.append(a)
+                if qf[s][a] > (1+sensitivity)*max_action_val:
+                    candidates = [a]
+                    max_action_val = qf[s][a]
+                elif (1-sensitivity)*max_action_val <= qf[s][a] <= (1+sensitivity)*max_action_val:
+                    candidates.append(a)
+                    max_action_val = qf[s][a]
+
+        candidates = list(set(candidates))
+        applicants = list(set(applicants))
+
+        predictions.append(candidates)
+        options.append(applicants)
+
+        if consider_options and (len(candidates) < len(applicants)):
+            score = [take_action in candidates]
+        else:
+            score = []
+            for predict_action in candidates:
+                score.append(predict_action == take_action)
+        scores.append(np.mean(score))
+
+        p, sp = transition_function(states[s], take_action)
+        s = states.index(sp)
+        available_actions.remove(take_action)
+
+    return scores, predictions, options
+
+
+# ------------------------------------------------- Contribution ---------------------------------------------------- #
+
+def online_predict_trajectory(task, demos, task_trajectories, weights, features, samples, priors,
+                              sensitivity=0, consider_options=False):
+
+    # assume the same starting state and available actions for all users
+    demo = demos[0]
+    s, available_actions = 0, demo.copy()
+    transition_function = task.transition
+    states = task.states
+
+    scores, predictions, options = [], [], []
+    for step, take_action in enumerate(demo):
+
+        # compute policy for current estimate of weights
+        rewards = features.dot(weights)
+        qf, _, _ = value_iteration(task.states, task.actions, task.transition, rewards, task.terminal_idx)
+
+        # anticipate user action in current state
+        max_action_val = -np.inf
+        candidates, applicants = [], []
+        for a in available_actions:
+            p, sp = transition_function(states[s], a)
+            if sp:
+                applicants.append(a)
+                if qf[s][a] > (1 + sensitivity) * max_action_val:
+                    candidates = [a]
+                    max_action_val = qf[s][a]
+                elif (1 - sensitivity) * max_action_val <= qf[s][a] <= (1 + sensitivity) * max_action_val:
+                    candidates.append(a)
+                    max_action_val = qf[s][a]
+
+        candidates = list(set(candidates))
+        applicants = list(set(applicants))
+        predictions.append(candidates)
+        options.append(applicants)
+
+        # calculate accuracy of prediction
+        if consider_options and (len(candidates) < len(applicants)):
+            score = [take_action in candidates]
+        else:
+            score = []
+            for predict_action in candidates:
+                score.append(predict_action == take_action)
+        scores.append(np.mean(score))
+
+        # update weights based on correct user action
+        future_actions = deepcopy(available_actions)
+        if np.mean(score) < 1.0:
+
+            # infer intended user action
+            prev_weights = deepcopy(weights)
+            p, sp = transition_function(states[s], take_action)
+            future_actions.remove(take_action)
+            ro = rollout_trajectory(qf, states, transition_function, future_actions, states.index(sp))
+            future_actions.append(take_action)
+            complex_user_demo = [demo[:step] + [take_action] + ro]
+            complex_trajectories = get_trajectories(states, complex_user_demo, transition_function)
+
+            # compute set from which user picks the intended action
+            # all_complex_trajectories = [traj for traj in task_trajectories if all(traj[:step, 1] == demo[:step])]
+            # all_intended_trajectories = [traj for traj in task_trajectories if all(traj[:step+1, 1] == demo[:step+1])]
+
+            # likelihood_intention = boltzman_likelihood(features, all_intended_trajectories, prev_weights)
+            # intention_idx = likelihood_intention.index(max(likelihood_intention))
+            # intended_trajectory = all_intended_trajectories[intention_idx]
+
+            # update weights
+            n_samples = 100
+            weight_priors = np.ones(len(samples))/len(samples)
+            posterior = []
+            for n_sample in range(n_samples):
+                weight_idx = np.random.choice(range(len(samples)), size=1, p=weight_priors)[0]
+                complex_weights = samples[weight_idx]
+                likelihood_all_traj, _ = boltzman_likelihood(features, task_trajectories, complex_weights)
+                likelihood_user_demo, r = boltzman_likelihood(features, complex_trajectories, complex_weights)
+                likelihood_user_demo = likelihood_user_demo / np.sum(likelihood_all_traj)
+                bayesian_update = (likelihood_user_demo[0] * weight_priors[n_sample])
+
+                # new_samples.append(complex_weights)
+                posterior.append(bayesian_update)
+
+            posterior = list(posterior / np.sum(posterior))
+            max_posterior = max(posterior)
+
+            weights = samples[posterior.index(max_posterior)]
+            # samples = deepcopy(new_samples)
+            # priors = deepcopy(posterior)
+
+            print("Updated weights from", prev_weights, "to", weights)
+
+        # priors = priors / np.sum(priors)
+        p, sp = transition_function(states[s], take_action)
+        s = states.index(sp)
+        available_actions.remove(take_action)
+
+    return scores, predictions, options
diff --git a/src/optimizer.py b/src/optimizer.py
new file mode 100644
index 0000000..5d4dfff
--- /dev/null
+++ b/src/optimizer.py
@@ -0,0 +1,398 @@
+"""
+Generic stochastic gradient-ascent based optimizers.
+
+Due to the MaxEnt IRL objective of maximizing the log-likelihood instead of
+minimizing a loss function, all optimizers in this module are actually
+stochastic gradient-ascent based instead of the more typical descent.
+"""
+
+import numpy as np
+
+
+class Optimizer:
+    """
+    Optimizer base-class.
+
+    Note:
+        Before use of any optimizer, its `reset` function must be called.
+
+    Attributes:
+        parameters: The parameters to be optimized. This should only be set
+            via the `reset` method of this optimizer.
+    """
+    def __init__(self):
+        self.parameters = None
+
+    def reset(self, parameters):
+        """
+        Reset this optimizer.
+
+        Args:
+            parameters: The parameters to optimize.
+        """
+        self.parameters = parameters
+
+    def step(self, grad, *args, **kwargs):
+        """
+        Perform a single optimization step.
+
+        Args:
+            grad: The gradient used for the optimization step.
+
+            Other arguments are optimizer-specific.
+        """
+        raise NotImplementedError
+
+    def normalize_grad(self, ord=None):
+        """
+        Create a new wrapper for this optimizer which normalizes the
+        gradient before each step.
+
+        Returns:
+            An Optimizer instance wrapping this Optimizer, normalizing the
+            gradient before each step.
+
+        See also:
+            `class NormalizeGrad`
+        """
+        return NormalizeGrad(self, ord)
+
+
+class Sga(Optimizer):
+    """
+    Basic stochastic gradient ascent.
+
+    Note:
+        Before use of any optimizer, its `reset` function must be called.
+
+    Args:
+        lr: The learning-rate. This may either be a float for a constant
+            learning-rate or a function
+            `(k: Integer) -> learning_rate: Float`
+            taking the step number as parameter and returning a learning
+            rate as result.
+            See also `linear_decay`, `power_decay` and `exponential_decay`.
+
+    Attributes:
+        parameters: The parameters to be optimized. This should only be set
+            via the `reset` method of this optimizer.
+        lr: The learning-rate as specified in the __init__ function.
+        k: The number of steps run since the last reset.
+    """
+    def __init__(self, lr):
+        super().__init__()
+        self.lr = lr
+        self.k = 0
+
+    def reset(self, parameters):
+        """
+        Reset this optimizer.
+
+        Args:
+            parameters: The parameters to optimize.
+        """
+        super().reset(parameters)
+        self.k = 0
+
+    def step(self, grad, *args, **kwargs):
+        """
+        Perform a single optimization step.
+
+        Args:
+            grad: The gradient used for the optimization step.
+        """
+        lr = self.lr if not callable(self.lr) else self.lr(self.k)
+        self.k += 1
+
+        self.parameters += lr * grad
+
+
+class ExpSga(Optimizer):
+    """
+    Exponentiated stochastic gradient ascent.
+
+    The implementation follows Algorithm 10.5 from B. Ziebart's thesis
+    (2010) and is slightly adapted from the original algorithm provided by
+    Kivinen and Warmuth (1997).
+
+    Note:
+        Before use of any optimizer, its `reset` function must be called.
+
+    Args:
+        lr: The learning-rate. This may either be a float for a constant
+            learning-rate or a function
+            `(k: Integer) -> learning_rate: Float`
+            taking the step number as parameter and returning a learning
+            rate as result.
+            See also `linear_decay`, `power_decay` and `exponential_decay`.
+        normalize: A boolean specifying if the the parameters should be
+            normalized after each step, as done in the original algorithm by
+            Kivinen and Warmuth (1997).
+
+    Attributes:
+        parameters: The parameters to be optimized. This should only be set
+            via the `reset` method of this optimizer.
+        lr: The learning-rate as specified in the __init__ function.
+        k: The number of steps run since the last reset.
+    """
+    def __init__(self, lr, normalize=False):
+        super().__init__()
+        self.lr = lr
+        self.normalize = normalize
+        self.k = 0
+
+    def reset(self, parameters):
+        """
+        Reset this optimizer.
+
+        Args:
+            parameters: The parameters to optimize.
+        """
+        super().reset(parameters)
+        self.k = 0
+
+    def step(self, grad, *args, **kwargs):
+        """
+        Perform a single optimization step.
+
+        Args:
+            grad: The gradient used for the optimization step.
+        """
+        lr = self.lr if not callable(self.lr) else self.lr(self.k)
+        self.k += 1
+
+        self.parameters *= np.exp(lr * grad)
+
+        if self.normalize:
+            self.parameters /= self.parameters.sum()
+
+
+class NormalizeGrad(Optimizer):
+    """
+    A wrapper wrapping another Optimizer, normalizing the gradient before
+    each step.
+
+    For every call to `step`, this Optimizer will normalize the gradient and
+    then pass the normalized gradient on to the underlying optimizer
+    specified in the constructor.
+
+    Note:
+        Before use of any optimizer, its `reset` function must be called.
+
+    Args:
+        opt: The underlying optimizer to be used.
+        ord: The order of the norm to be used for normalizing. This argument
+            will be direclty passed to `numpy.linalg.norm`.
+    """
+    def __init__(self, opt, ord=None):
+        super().__init__()
+        self.opt = opt
+        self.ord = ord
+
+    def reset(self, parameters):
+        """
+        Reset this optimizer.
+
+        Args:
+            parameters: The parameters to optimize.
+        """
+        super().reset(parameters)
+        self.opt.reset(parameters)
+
+    def step(self, grad, *args, **kwargs):
+        """
+        Perform a single optimization step.
+
+        This will call the underlying optimizer with the normalized
+        gradient.
+
+        Args:
+            grad: The gradient used for the optimization step.
+
+            Other arguments depend on the underlying optimizer.
+        """
+        return self.opt.step(grad / np.linalg.norm(grad, self.ord), *args, **kwargs)
+
+
+def linear_decay(lr0=0.2, decay_rate=1.0, decay_steps=1):
+    """
+    Linear learning-rate decay.
+
+    Creates a function `(k: Integer) -> learning_rate: Float` returning the
+    learning-rate in dependence on the current number of iterations. The
+    returned function can be expressed as
+
+        learning_rate(k) = lr0 / (1.0 + decay_rate * floor(k / decay_steps))
+
+    Args:
+        lr0: The initial learning-rate.
+        decay_rate: The decay factor.
+        decay_steps: An integer number of steps that can be used to
+            staircase the learning-rate.
+
+    Returns:
+        The function giving the current learning-rate in dependence of the
+        current iteration as specified above.
+    """
+    def _lr(k):
+        return lr0 / (1.0 + decay_rate * np.floor(k / decay_steps))
+
+    return _lr
+
+
+def power_decay(lr0=0.2, decay_rate=1.0, decay_steps=1, power=2):
+    """
+    Power-based learning-rate decay.
+
+    Creates a function `(k: Integer) -> learning_rate: Float` returning the
+    learning-rate in dependence on the current number of iterations. The
+    returned function can be expressed as
+
+        learning_rate(k) = lr0 / (1.0 + decay_rate * floor(k / decay_steps))^power
+
+    Args:
+        lr0: The initial learning-rate.
+        decay_rate: The decay factor.
+        decay_steps: An integer number of steps that can be used to
+            staircase the learning-rate.
+        power: The exponent to use for decay.
+
+    Returns:
+        The function giving the current learning-rate in dependence of the
+        current iteration as specified above.
+    """
+    def _lr(k):
+        return lr0 / (decay_rate * np.floor(k / decay_steps) + 1.0)**power
+
+    return _lr
+
+
+def exponential_decay(lr0=0.2, decay_rate=0.5, decay_steps=1):
+    """
+    Exponential learning-rate decay.
+
+    Creates a function `(k: Integer) -> learning_rate: Float` returning the
+    learning-rate in dependence on the current number of iterations. The
+    returned function can be expressed as
+
+        learning_rate(k) = lr0 * e^(-decay_rate * floor(k / decay_steps))
+
+    Args:
+        lr0: The initial learning-rate.
+        decay_rate: The decay factor.
+        decay_steps: An integer number of steps that can be used to
+            staircase the learning-rate.
+
+    Returns:
+        The function giving the current learning-rate in dependence of the
+        current iteration as specified above.
+    """
+    def _lr(k):
+        return lr0 * np.exp(-decay_rate * np.floor(k / decay_steps))
+
+    return _lr
+
+
+class Initializer:
+    """
+    Base-class for an Initializer, specifying a strategy for parameter
+    initialization.
+    """
+    def __init__(self):
+        pass
+
+    def initialize(self, shape):
+        """
+        Create an initial set of parameters.
+
+        Args:
+            shape: The shape of the parameters.
+
+        Returns:
+            An initial set of parameters of the given shape, adhering to the
+            initialization-strategy described by this Initializer.
+        """
+        raise NotImplementedError
+
+    def __call__(self, shape):
+        """
+        Create an initial set of parameters.
+
+        Note:
+            This function simply calls `self.initialize(shape)`.
+
+        Args:
+            shape: The shape of the parameters.
+
+        Returns:
+            An initial set of parameters of the given shape, adhering to the
+            initialization-strategy described by this Initializer.
+        """
+        return self.initialize(shape)
+
+
+class Uniform(Initializer):
+    """
+    An Initializer, initializing parameters according to a specified uniform
+    distribution.
+
+    Args:
+        low: The minimum value of the distribution.
+        high: The maximum value of the distribution
+
+    Attributes:
+        low: The minimum value of the distribution.
+        high: The maximum value of the distribution
+    """
+    def __init__(self, low=0.0, high=1.0):
+        super().__init__()
+        self.low = low
+        self.high = high
+
+    def initialize(self, shape):
+        """
+        Create an initial set of uniformly random distributed parameters.
+
+        The parameters of the distribution can be specified in the
+        constructor.
+
+        Args:
+            shape: The shape of the parameters.
+
+        Returns:
+            An set of initial uniformly distributed parameters of the given
+            shape.
+        """
+        return np.random.uniform(size=shape, low=self.low, high=self.high)
+
+
+class Constant(Initializer):
+    """
+    An Initializer, initializing parameters to a constant value.
+
+    Args:
+        value: Either a scalar value or a function in dependence on the
+            shape of the parameters, returning a scalar value for
+            initialization.
+    """
+    def __init__(self, value=1.0):
+        super().__init__()
+        self.value = value
+
+    def initialize(self, shape):
+        """
+        Create set of parameters with initial fixed value.
+
+        The scalar value used for initialization can be specified in the
+        constructor.
+
+        Args:
+            shape: The shape of the parameters.
+
+        Returns:
+            An set of constant-valued parameters of the given shape.
+        """
+        if callable(self.value):
+            return np.ones(shape) * self.value(shape)
+        else:
+            return np.ones(shape) * self.value
diff --git a/vi.py b/src/vi.py
similarity index 98%
rename from vi.py
rename to src/vi.py
index a28df1f..551255f 100644
--- a/vi.py
+++ b/src/vi.py
@@ -11,7 +11,9 @@ def value_iteration(states, actions, transition, rewards, terminal_states, delta
         terminal_states: index of terminal states
         rewards: list of rewards for each state
         delta: error threshold
+
     Returns: q-value for each state-action pair, value for each state, optimal actions in each state
+
     """
     vf = {s: 0 for s in range(len(states))}  # values
     op_actions = {s: 0 for s in range(len(states))}  # optimal actions
@@ -66,4 +68,4 @@ def value_iteration(states, actions, transition, rewards, terminal_states, delta
     if change >= delta:
         print("VI did not converge after %d iterations (delta=%.2f)" % (i, change))
 
-    return qf, vf, op_actions
\ No newline at end of file
+    return qf, vf, op_actions
diff --git a/test_proactive_assembly_v2_with_weights.py b/test_proactive_assembly_v2_with_weights.py
index a214721..ddb2d07 100644
--- a/test_proactive_assembly_v2_with_weights.py
+++ b/test_proactive_assembly_v2_with_weights.py
@@ -19,8 +19,11 @@
 
 import common
 from collections import OrderedDict
-from vi import value_iteration
-from irl import get_trajectories, rollout_trajectory, boltzman_likelihood
+from src.vi import value_iteration
+import src.optimizer as O  # stochastic gradient descent optimizer
+from src.maxent_irl import *
+from src.assembly_tasks import *
+from src.import_qualtrics import get_qualtrics_survey
 
 
 # set to False if operating real robot
@@ -180,7 +183,7 @@ def __init__(self):
                         "tail screw": [container1_4, container1_4Pose, container1GraspPose, container1GraspOffset, container1URDFUri],
                         "propeller blades": [container2_1, container2_1Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
                         "tool": [container2_2, container2_2Pose, container2GraspPose, container2GraspOffset, container2URDFUri],
-                        "main wing": [mainWing, wingURDFUri],
+                        "main wing": [mainWing, wingPose, 0,0, wingURDFUri],
                         "airplane body": []}
                         
         # ------------------------------------------------ Get robot config ---------------------------------------------- #
@@ -214,10 +217,9 @@ def __init__(self):
 
         ###
         # load the variables for computing weights
-        #self.weights = np.array(pickle.load(open(directory_syspath + "/data/weights_" + user_id + ".p", "rb")))
-        #self.terminal_states = pickle.load(open(directory_syspath + "/data/terminal_states.p", "rb"))
-        #self.features = np.array(pickle.load(open(directory_syspath + "/data/features_" + user_id + ".p", "rb")))
-        #self.all_complex_trajectories = pickle.load(open("data/complex_trajectories.csv", "rb"))
+        self.weights = np.array(pickle.load(open(directory_syspath + "/data/weights_" + user_id + ".p", "rb")))
+        self.features = np.array(pickle.load(open(directory_syspath + "/data/features_" + user_id + ".p", "rb")))
+        self.task = np.array(pickle.load(open(directory_syspath + "/data/task_" + user_id + ".p", "rb")))
 
         # actions in airplane assembly and objects required for each action
         self.all_user_actions = [0, 1, 2, 2, 2, 2, 3, 4, 4, 4, 4, 5, 6, 6, 6, 6, 7]
@@ -243,6 +245,10 @@ def __init__(self):
         # objects yet to be delivered
         self.remaining_objects = list(self.objects.keys())
 
+        # irl parameters
+        self.init = O.Constant(0.5)
+        self.optim = O.ExpSga(lr=O.linear_decay(lr0=0.5))
+
         # subscribe to action recognition
         sub_act = rospy.Subscriber("/april_tag_detection", Float64MultiArray, self.callback, queue_size=1)
 
@@ -439,7 +445,7 @@ def update_application(self):
             if ret == QMessageBox.Cancel:
                 self.new_features = []
 
-            print(self.new_features)
+            #print(self.new_features)
             self.setNewFeature = False
 
     def calculate_anticipated_action(self, current_state, remaining_user_actions):
@@ -482,12 +488,18 @@ def callback(self, data):
         current_state = self.current_state
 
         # if in the beginning of the program there is no prediction yet, update remaining user action according to user action detected
-        if len(self.anticipated_actions)==0:
+        if len(self.anticipated_actions)==0 and len(self.user_sequence) == 0:
+            # update action sequence
+            self.user_sequence = detected_sequence
+            self.time_step = len(self.user_sequence)
             for x in self.user_sequence:
                 self.remaining_user_actions.remove(x)
+                print(self.states[430])
+        
+
 
         ###
-        FLAG = False
+        FLAG = True
 
         # compare self.user_sequence and detected_sequence to check for any newly added actions
         new_detected_sequence = []
@@ -495,70 +507,74 @@ def callback(self, data):
             new_detected_sequence = detected_sequence[len(self.user_sequence):]
 
         # check newly detected actions one by one with last anticipated_actions
-        count = 0
-        for new_a in new_detected_sequence:
-            count += 1
-            if not self.anticipated_actions[0] == new_a:
-                if FLAG:
-                    # if different and the mismatch is due to weights need updating, update weights
-                    #prev_weights = deepcopy(self.weights)
-                    p, sp = common.transition(current_state, new_a)
-
-                    future_actions = deepcopy(self.remaining_user_actions)
-                    future_actions.remove(new_a)
-                    ro = rollout_trajectory(self.qf, self.states, common.transition, future_actions, self.states.index(sp))
-                    future_actions.append(new_a)
-                    complex_user_demo = [detected_sequence[:step] + [new_a] + ro]
-                    complex_trajectories = get_trajectories(self.states, complex_user_demo, common.transition)
-                    
-                    n_samples = 10
-                    weight_priors = np.ones(n_samples)/n_samples
-                    new_samples = []
-                    posterior = []
-                    for n_sample in range(n_samples):
-                        u = np.random.uniform(0., 1., 3) # dim(features) = dim(weights.T)
-                        d = 1.0  # np.sum(u)  # np.sum(u ** 2) ** 0.5
-                        complex_weights = u / d
-                        likelihood_all_trajectories, _ = boltzman_likelihood(self.features, self.all_complex_trajectories, complex_weights)
-                        likelihood_user_demo, r = boltzman_likelihood(self.features, complex_trajectories, complex_weights)
-                        likelihood_user_demo = likelihood_user_demo / np.sum(likelihood_all_trajectories)
-                        bayesian_update = (likelihood_user_demo[0] * weight_priors[n_sample])
-
-                        new_samples.append(complex_weights)
-                        posterior.append(bayesian_update)
-
-                    posterior = list(posterior / np.sum(posterior))
-                    max_posterior = max(posterior)
-                    self.weights = new_samples[posterior.index(max_posterior)]
-                    #print("Updated weights from", prev_weights, "to", self.weights)
-
-                    # compute new q values from new weights
-                    rewards = self.features.dot(self.weights)
-                    self.qf, _, _ = value_iteration(self.states, self.remaining_user_actions, common.transition, rewards, self.terminal_states)
+        for count, new_a in enumerate(new_detected_sequence):
+            if FLAG:
+                # if different and the mismatch is due to weights need updating, update weights
+                prev_weights = deepcopy(self.weights)
+                p, sp = common.transition(current_state, new_a)
+
+                future_actions = deepcopy(self.remaining_user_actions)
+                future_actions.remove(new_a)
+                print(self.states.index(sp))
+                ro = rollout_trajectory(self.qf, self.states, common.canonical_transition, future_actions, self.states.index(sp))
+                future_actions.append(new_a)
+                complex_user_demo = [detected_sequence[:self.time_step + count] + [new_a] + ro]
+                complex_trajectories = get_trajectories(self.states, complex_user_demo, common.canonical_transition)
+                
+                # # Bayesian approach
+                # n_samples = 10
+                # weight_priors = np.ones(n_samples)/n_samples
+                # new_samples = []
+                # posterior = []
+                # for n_sample in range(n_samples):
+                #     u = np.random.uniform(0., 1., 3) # dim(features) = dim(weights.T)
+                #     d = 1.0  # np.sum(u)  # np.sum(u ** 2) ** 0.5
+                #     complex_weights = u / d
+                #     likelihood_all_trajectories, _ = boltzman_likelihood(self.features, self.all_complex_trajectories, complex_weights)
+                #     likelihood_user_demo, r = boltzman_likelihood(self.features, complex_trajectories, complex_weights)
+                #     likelihood_user_demo = likelihood_user_demo / np.sum(likelihood_all_trajectories)
+                #     bayesian_update = (likelihood_user_demo[0] * weight_priors[n_sample])
+
+                #     new_samples.append(complex_weights)
+                #     posterior.append(bayesian_update)
+
+                # posterior = list(posterior / np.sum(posterior))
+                # max_posterior = max(posterior)
+                # self.weights = new_samples[posterior.index(max_posterior)]
+
+                # Max entropy approach
+                _, new_weights = maxent_irl(self.task, self.features, complex_trajectories, self.optim, self.init)
+                self.weights = new_weights
+
+
+                # compute new q values from new weights
+                rewards = self.features.dot(self.weights)
+                self.qf, _, _ = value_iteration(self.states, self.remaining_user_actions, self.task.transition, rewards, self.task.terminal_idx)
+                print(np.array_equal(prev_weights, self.weights))
 
-                else:
-                    # is mismatch is due to new features, add pop up window to ask the user about which feature to add
-                    self.setNewFeature = True
+            else:
+                # is mismatch is due to new features, add pop up window to ask the user about which feature to add
+                self.setNewFeature = True
 
-                    while self.setNewFeature:
-                        placeholder = 1
+                while self.setNewFeature:
+                    placeholder = 1
 
-                    # update q values
-                    print(self.new_features)
+                # update q values
+                #print(self.new_features)
 
 
-                # for each iteration 
-                # determine current state based on detected action sequence
-                current_state = self.states[0]
-                for user_action in self.user_sequence + new_detected_sequence[0:count]:
-                    # for i in range(self.action_counts[user_action]):
-                    p, next_state = common.transition(current_state, user_action)
-                    current_state = next_state
+            # for each iteration 
+            # determine current state based on detected action sequence
+            current_state = self.states[0]
+            for user_action in self.user_sequence + new_detected_sequence[0:count]:
+                # for i in range(self.action_counts[user_action]):
+                p, next_state = common.transition(current_state, user_action)
+                current_state = next_state
 
-                self.remaining_user_actions.remove(new_a)
+            self.remaining_user_actions.remove(new_a)
 
-                # ---------------------------------------- Anticipate next user action --------------------------------------- #
-                self.available_actions, self.anticipated_actions = self.calculate_anticipated_action(current_state, self.remaining_user_actions)
+            # ---------------------------------------- Anticipate next user action --------------------------------------- #
+            self.available_actions, self.anticipated_actions = self.calculate_anticipated_action(current_state, self.remaining_user_actions)
 
         # ------------------------------------ Anticipate next user action with new weights ------------------------------------ #