fixed tool for canonical, still working on updating weights

herambnemlekar · May 18, 2022 · d19c811 · d19c811
1 parent 0b55014
commit d19c811
Show file tree

Hide file tree

Showing 25 changed files with 1,615 additions and 90 deletions.
diff --git a/canonical_assembly_v2.py b/canonical_assembly_v2.py
@@ -315,7 +315,7 @@ def callback(self, data):
         self.time_step = len(self.user_sequence)
 
         # update remaining parts
-        self.remaining_objects = [rem_obj for rem_obj in self.remaining_objects if rem_obj not in detected_parts]        
+        self.remaining_objects = [rem_obj for rem_obj in list(self.objects.keys()) if rem_obj not in detected_parts]        
 
 
     def deliver_part(self):
@@ -492,7 +492,18 @@ def deliver_part(self):
                 else:
                 # ------------------- Move Container back to original place if not boxes------------------- #
                     # wait for user to grab item
-                    time.sleep(3)
+                    if chosen_obj == "tool":
+                        # hold tool box until user picks up tool and put tool back
+                        while("tool" in self.remaining_objects):
+                            time.sleep(0.1)
+                        print("picked up tool")
+                        time.sleep(0.5)
+                        while ("tool" not in self.remaining_objects):
+                            time.sleep(0.5)
+                            print("tool is out")
+                        print("tool put back")
+                    else:
+                        time.sleep(3)
 
                     if chosen_obj in ["long bolts", "short bolts", "long wire"]:
                         #print("smoothier turn")

diff --git a/common.py b/common.py
@@ -73,4 +73,53 @@ def transition(s_from, a):
         s_to[-2] = a
         return p, s_to
     else:
-        return p, None
+        return p, None
+
+
+def back_transition(s_to, a):
+    # preconditions
+    if s_to[a] > 0:
+        if a == 0 and s_to[2] < 1:
+            p = 1.0
+        elif a == 1 and s_to[3] < 1:
+            p = 1.0
+        elif a in [2, 3] and s_to[a] > s_to[a + 2]:
+            p = 1.0
+        elif a in [6] and s_to[a + 1] < 1:
+            p = 1.0
+        elif a in [4, 5, 7]:
+            p = 1.0
+        else:
+            p = 0.0
+    else:
+        p = 0.0
+
+    # transition to next state
+    if p == 1.0:
+        s_from = deepcopy(s_to)
+        s_from[a] -= 1
+        return p, s_from
+    else:
+        return p, None
+
+def canonical_transition(s_from, a):
+        # preconditions
+        if s_from[a] < 1:
+            if a in [0, 1, 2, 5]:
+                prob = 1.0
+            elif a in [3, 4] and s_from[a - 3] == 1:
+                prob = 1.0
+            else:
+                prob = 0.0
+        else:
+            prob = 0.0
+
+        # transition to next state
+        if prob == 1.0:
+            s_to = deepcopy(s_from)
+            s_to[a] += 1
+            s_to[-1] = s_from[-2]
+            s_to[-2] = a
+            return prob, s_to
+        else:
+            return prob, None
diff --git a/compute_weights.py b/compute_weights.py
@@ -0,0 +1,154 @@
+# import python libraries
+import os
+import pdb
+import numpy as np
+from copy import deepcopy
+import pandas as pd
+import pickle
+
+# import functions
+import src.optimizer as O  # stochastic gradient descent optimizer
+from src.vi import value_iteration
+from src.maxent_irl import *
+from src.assembly_tasks import *
+from src.import_qualtrics import get_qualtrics_survey
+
+# ----------------------------------------------- Load data ---------------------------------------------------- #
+
+# download data from qualtrics
+learning_survey_id = "SV_8eoX63z06ZhVZRA"
+data_path = "/home/icaros/ros_ws/src/ada_manipulation_demos/data/"  # os.path.dirname(__file__) + "/data/"
+get_qualtrics_survey(dir_save_survey=data_path, survey_id=learning_survey_id)
+
+# load user data
+demo_path = data_path + "Human-Robot Assembly - Learning.csv"
+df = pd.read_csv(demo_path)
+
+
+# pre-process feature value
+def process_val(x):
+    if x == "1 (No effort at all)":
+        x = 1.1
+    elif x == "7 (A lot of effort)":
+        x = 6.9
+    else:
+        x = float(x)
+
+    return x
+
+
+# load user ratings
+def load_features(data, user_idx, feature_idx, action_idx):
+    fea_mat = []
+    for j in action_idx:
+        fea_vec = []
+        for k in feature_idx:
+            fea_col = k + str(j)
+            fea_val = process_val(data[fea_col][user_idx])
+            fea_vec.append(fea_val)
+        fea_mat.append(fea_vec)
+    return fea_mat
+
+
+# ----------------------------------------------- Optimization -------------------------------------------------- #
+
+# choose our parameter initialization strategy:
+# initialize parameters with constant
+init = O.Constant(0.5)
+
+# choose our optimization strategy:
+# we select exponentiated stochastic gradient descent with linear learning-rate decay
+optim = O.ExpSga(lr=O.linear_decay(lr0=0.5))
+
+# --------------------------------------------- User information ------------------------------------------------ #
+
+rank_features = False
+scale_weights = False
+
+user_id = input("Enter user id: ")
+
+print("=======================")
+print("Calculating preference for user:", user_id)
+
+idx = df.index[df['Q1'] == user_id][0]
+canonical_survey_actions = [0, 3, 1, 4, 2, 5]
+preferred_order = [df[q][idx] for q in ['Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6']]
+canonical_demo = [a for _, a in sorted(zip(preferred_order, canonical_survey_actions))]
+
+# user ratings for features
+canonical_q, complex_q = ["Q6_", "Q7_"], ["Q13_", "Q14_"] 
+canonical_features = load_features(df, idx, canonical_q, [2, 4, 6, 3, 5, 7])
+complex_features = load_features(df, idx, complex_q, [3, 8, 15, 16, 4, 9, 10, 11])
+
+# ---------------------------------------- Training: Learn weights ---------------------------------------------- #
+
+# initialize canonical task
+C = CanonicalTask(canonical_features)
+C.set_end_state(canonical_demo)
+C.enumerate_states()
+C.set_terminal_idx()
+if rank_features:
+    C.convert_to_rankings()
+
+# demonstrations
+canonical_user_demo = [canonical_demo]
+canonical_trajectories = get_trajectories(C.states, canonical_user_demo, C.transition)
+
+print("Training ...")
+
+# using abstract features
+abstract_features = np.array([C.get_features(state) for state in C.states])
+norm_abstract_features = abstract_features / np.linalg.norm(abstract_features, axis=0)
+canonical_rewards_abstract, canonical_weights_abstract = maxent_irl(C, norm_abstract_features,
+                                                                    canonical_trajectories,
+                                                                    optim, init)
+
+print("Weights have been learned for the canonical task! Fingers X-ed.")
+print("Weights -", canonical_weights_abstract)
+
+# scale weights
+if scale_weights:
+    canonical_weights_abstract /= max(canonical_weights_abstract)
+
+# ----------------------------------------- Testing: Predict complex -------------------------------------------- #
+sample_complex_demo = [1, 3, 5, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 7]
+
+complex_survey_actions = [0, 4, 1, 5, 6, 7, 2, 3]
+action_counts = [1, 1, 4, 1, 4, 1, 4, 1]
+preferred_order = [df[q][idx] for q in ['Q15_1', 'Q15_2', 'Q15_3', 'Q15_4', 'Q15_5', 'Q15_6', 'Q15_7', 'Q15_8']]
+complex_demo = []
+for _, a in sorted(zip(preferred_order, complex_survey_actions)):
+    complex_demo += [a]*action_counts[a]
+
+# initialize complex task
+X = ComplexTask(complex_features)
+X.set_end_state(sample_complex_demo)
+X.enumerate_states()
+X.set_terminal_idx()
+if rank_features:
+    X.convert_to_rankings()
+
+# using abstract features
+complex_abstract_features = np.array([X.get_features(state) for state in X.states])
+complex_abstract_features /= np.linalg.norm(complex_abstract_features, axis=0)
+
+# transfer rewards to complex task
+transfer_rewards_abstract = complex_abstract_features.dot(canonical_weights_abstract)
+
+# compute q-values for each state based on learned weights
+qf_transfer, _, _ = value_iteration(X.states, X.actions, X.transition, transfer_rewards_abstract, X.terminal_idx)
+
+# score for predicting the action based on transferred rewards based on abstract features
+# predict_sequence, predict_score = predict_trajectory(qf_transfer, X.states, [complex_demo], X.transition,
+#                                                              sensitivity=0.0, consider_options=False)
+
+print("canonical : ", canonical_demo)
+print("preference: ", complex_demo)
+
+# save_path = data_path + "learned_models/"
+pickle.dump(canonical_weights_abstract, open(data_path + "weights_" + user_id + ".p", "wb"))
+pickle.dump(X, open(data_path + "task_" + user_id + ".p", "wb"))
+pickle.dump(complex_abstract_features, open(data_path + "features_" + user_id + ".p", "wb"))
+pickle.dump(qf_transfer, open(data_path + "q_values_" + user_id + ".p", "wb"))
+pickle.dump(X.states, open(data_path + "states_" + user_id + ".p", "wb"))
+print("Q-values have been saved for user " + user_id + ".")