Skip to content

Commit

Permalink
fixed tool for canonical, still working on updating weights
Browse files Browse the repository at this point in the history
  • Loading branch information
herambnemlekar committed May 18, 2022
1 parent 0b55014 commit d19c811
Show file tree
Hide file tree
Showing 25 changed files with 1,615 additions and 90 deletions.
15 changes: 13 additions & 2 deletions canonical_assembly_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def callback(self, data):
self.time_step = len(self.user_sequence)

# update remaining parts
self.remaining_objects = [rem_obj for rem_obj in self.remaining_objects if rem_obj not in detected_parts]
self.remaining_objects = [rem_obj for rem_obj in list(self.objects.keys()) if rem_obj not in detected_parts]


def deliver_part(self):
Expand Down Expand Up @@ -492,7 +492,18 @@ def deliver_part(self):
else:
# ------------------- Move Container back to original place if not boxes------------------- #
# wait for user to grab item
time.sleep(3)
if chosen_obj == "tool":
# hold tool box until user picks up tool and put tool back
while("tool" in self.remaining_objects):
time.sleep(0.1)
print("picked up tool")
time.sleep(0.5)
while ("tool" not in self.remaining_objects):
time.sleep(0.5)
print("tool is out")
print("tool put back")
else:
time.sleep(3)

if chosen_obj in ["long bolts", "short bolts", "long wire"]:
#print("smoothier turn")
Expand Down
51 changes: 50 additions & 1 deletion common.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,53 @@ def transition(s_from, a):
s_to[-2] = a
return p, s_to
else:
return p, None
return p, None


def back_transition(s_to, a):
# preconditions
if s_to[a] > 0:
if a == 0 and s_to[2] < 1:
p = 1.0
elif a == 1 and s_to[3] < 1:
p = 1.0
elif a in [2, 3] and s_to[a] > s_to[a + 2]:
p = 1.0
elif a in [6] and s_to[a + 1] < 1:
p = 1.0
elif a in [4, 5, 7]:
p = 1.0
else:
p = 0.0
else:
p = 0.0

# transition to next state
if p == 1.0:
s_from = deepcopy(s_to)
s_from[a] -= 1
return p, s_from
else:
return p, None

def canonical_transition(s_from, a):
# preconditions
if s_from[a] < 1:
if a in [0, 1, 2, 5]:
prob = 1.0
elif a in [3, 4] and s_from[a - 3] == 1:
prob = 1.0
else:
prob = 0.0
else:
prob = 0.0

# transition to next state
if prob == 1.0:
s_to = deepcopy(s_from)
s_to[a] += 1
s_to[-1] = s_from[-2]
s_to[-2] = a
return prob, s_to
else:
return prob, None
154 changes: 154 additions & 0 deletions compute_weights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# import python libraries
import os
import pdb
import numpy as np
from copy import deepcopy
import pandas as pd
import pickle

# import functions
import src.optimizer as O # stochastic gradient descent optimizer
from src.vi import value_iteration
from src.maxent_irl import *
from src.assembly_tasks import *
from src.import_qualtrics import get_qualtrics_survey

# ----------------------------------------------- Load data ---------------------------------------------------- #

# download data from qualtrics
learning_survey_id = "SV_8eoX63z06ZhVZRA"
data_path = "/home/icaros/ros_ws/src/ada_manipulation_demos/data/" # os.path.dirname(__file__) + "/data/"
get_qualtrics_survey(dir_save_survey=data_path, survey_id=learning_survey_id)

# load user data
demo_path = data_path + "Human-Robot Assembly - Learning.csv"
df = pd.read_csv(demo_path)


# pre-process feature value
def process_val(x):
if x == "1 (No effort at all)":
x = 1.1
elif x == "7 (A lot of effort)":
x = 6.9
else:
x = float(x)

return x


# load user ratings
def load_features(data, user_idx, feature_idx, action_idx):
fea_mat = []
for j in action_idx:
fea_vec = []
for k in feature_idx:
fea_col = k + str(j)
fea_val = process_val(data[fea_col][user_idx])
fea_vec.append(fea_val)
fea_mat.append(fea_vec)
return fea_mat


# ----------------------------------------------- Optimization -------------------------------------------------- #

# choose our parameter initialization strategy:
# initialize parameters with constant
init = O.Constant(0.5)

# choose our optimization strategy:
# we select exponentiated stochastic gradient descent with linear learning-rate decay
optim = O.ExpSga(lr=O.linear_decay(lr0=0.5))

# --------------------------------------------- User information ------------------------------------------------ #

rank_features = False
scale_weights = False

user_id = input("Enter user id: ")

print("=======================")
print("Calculating preference for user:", user_id)

idx = df.index[df['Q1'] == user_id][0]
canonical_survey_actions = [0, 3, 1, 4, 2, 5]
preferred_order = [df[q][idx] for q in ['Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6']]
canonical_demo = [a for _, a in sorted(zip(preferred_order, canonical_survey_actions))]

# user ratings for features
canonical_q, complex_q = ["Q6_", "Q7_"], ["Q13_", "Q14_"]
canonical_features = load_features(df, idx, canonical_q, [2, 4, 6, 3, 5, 7])
complex_features = load_features(df, idx, complex_q, [3, 8, 15, 16, 4, 9, 10, 11])

# ---------------------------------------- Training: Learn weights ---------------------------------------------- #

# initialize canonical task
C = CanonicalTask(canonical_features)
C.set_end_state(canonical_demo)
C.enumerate_states()
C.set_terminal_idx()
if rank_features:
C.convert_to_rankings()

# demonstrations
canonical_user_demo = [canonical_demo]
canonical_trajectories = get_trajectories(C.states, canonical_user_demo, C.transition)

print("Training ...")

# using abstract features
abstract_features = np.array([C.get_features(state) for state in C.states])
norm_abstract_features = abstract_features / np.linalg.norm(abstract_features, axis=0)
canonical_rewards_abstract, canonical_weights_abstract = maxent_irl(C, norm_abstract_features,
canonical_trajectories,
optim, init)

print("Weights have been learned for the canonical task! Fingers X-ed.")
print("Weights -", canonical_weights_abstract)

# scale weights
if scale_weights:
canonical_weights_abstract /= max(canonical_weights_abstract)

# ----------------------------------------- Testing: Predict complex -------------------------------------------- #
sample_complex_demo = [1, 3, 5, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 7]

complex_survey_actions = [0, 4, 1, 5, 6, 7, 2, 3]
action_counts = [1, 1, 4, 1, 4, 1, 4, 1]
preferred_order = [df[q][idx] for q in ['Q15_1', 'Q15_2', 'Q15_3', 'Q15_4', 'Q15_5', 'Q15_6', 'Q15_7', 'Q15_8']]
complex_demo = []
for _, a in sorted(zip(preferred_order, complex_survey_actions)):
complex_demo += [a]*action_counts[a]

# initialize complex task
X = ComplexTask(complex_features)
X.set_end_state(sample_complex_demo)
X.enumerate_states()
X.set_terminal_idx()
if rank_features:
X.convert_to_rankings()

# using abstract features
complex_abstract_features = np.array([X.get_features(state) for state in X.states])
complex_abstract_features /= np.linalg.norm(complex_abstract_features, axis=0)

# transfer rewards to complex task
transfer_rewards_abstract = complex_abstract_features.dot(canonical_weights_abstract)

# compute q-values for each state based on learned weights
qf_transfer, _, _ = value_iteration(X.states, X.actions, X.transition, transfer_rewards_abstract, X.terminal_idx)

# score for predicting the action based on transferred rewards based on abstract features
# predict_sequence, predict_score = predict_trajectory(qf_transfer, X.states, [complex_demo], X.transition,
# sensitivity=0.0, consider_options=False)

print("canonical : ", canonical_demo)
print("preference: ", complex_demo)

# save_path = data_path + "learned_models/"
pickle.dump(canonical_weights_abstract, open(data_path + "weights_" + user_id + ".p", "wb"))
pickle.dump(X, open(data_path + "task_" + user_id + ".p", "wb"))
pickle.dump(complex_abstract_features, open(data_path + "features_" + user_id + ".p", "wb"))
pickle.dump(qf_transfer, open(data_path + "q_values_" + user_id + ".p", "wb"))
pickle.dump(X.states, open(data_path + "states_" + user_id + ".p", "wb"))
print("Q-values have been saved for user " + user_id + ".")
Loading

0 comments on commit d19c811

Please sign in to comment.