-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fixed tool for canonical, still working on updating weights
- Loading branch information
1 parent
0b55014
commit d19c811
Showing
25 changed files
with
1,615 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
# import python libraries | ||
import os | ||
import pdb | ||
import numpy as np | ||
from copy import deepcopy | ||
import pandas as pd | ||
import pickle | ||
|
||
# import functions | ||
import src.optimizer as O # stochastic gradient descent optimizer | ||
from src.vi import value_iteration | ||
from src.maxent_irl import * | ||
from src.assembly_tasks import * | ||
from src.import_qualtrics import get_qualtrics_survey | ||
|
||
# ----------------------------------------------- Load data ---------------------------------------------------- # | ||
|
||
# download data from qualtrics | ||
learning_survey_id = "SV_8eoX63z06ZhVZRA" | ||
data_path = "/home/icaros/ros_ws/src/ada_manipulation_demos/data/" # os.path.dirname(__file__) + "/data/" | ||
get_qualtrics_survey(dir_save_survey=data_path, survey_id=learning_survey_id) | ||
|
||
# load user data | ||
demo_path = data_path + "Human-Robot Assembly - Learning.csv" | ||
df = pd.read_csv(demo_path) | ||
|
||
|
||
# pre-process feature value | ||
def process_val(x): | ||
if x == "1 (No effort at all)": | ||
x = 1.1 | ||
elif x == "7 (A lot of effort)": | ||
x = 6.9 | ||
else: | ||
x = float(x) | ||
|
||
return x | ||
|
||
|
||
# load user ratings | ||
def load_features(data, user_idx, feature_idx, action_idx): | ||
fea_mat = [] | ||
for j in action_idx: | ||
fea_vec = [] | ||
for k in feature_idx: | ||
fea_col = k + str(j) | ||
fea_val = process_val(data[fea_col][user_idx]) | ||
fea_vec.append(fea_val) | ||
fea_mat.append(fea_vec) | ||
return fea_mat | ||
|
||
|
||
# ----------------------------------------------- Optimization -------------------------------------------------- # | ||
|
||
# choose our parameter initialization strategy: | ||
# initialize parameters with constant | ||
init = O.Constant(0.5) | ||
|
||
# choose our optimization strategy: | ||
# we select exponentiated stochastic gradient descent with linear learning-rate decay | ||
optim = O.ExpSga(lr=O.linear_decay(lr0=0.5)) | ||
|
||
# --------------------------------------------- User information ------------------------------------------------ # | ||
|
||
rank_features = False | ||
scale_weights = False | ||
|
||
user_id = input("Enter user id: ") | ||
|
||
print("=======================") | ||
print("Calculating preference for user:", user_id) | ||
|
||
idx = df.index[df['Q1'] == user_id][0] | ||
canonical_survey_actions = [0, 3, 1, 4, 2, 5] | ||
preferred_order = [df[q][idx] for q in ['Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6']] | ||
canonical_demo = [a for _, a in sorted(zip(preferred_order, canonical_survey_actions))] | ||
|
||
# user ratings for features | ||
canonical_q, complex_q = ["Q6_", "Q7_"], ["Q13_", "Q14_"] | ||
canonical_features = load_features(df, idx, canonical_q, [2, 4, 6, 3, 5, 7]) | ||
complex_features = load_features(df, idx, complex_q, [3, 8, 15, 16, 4, 9, 10, 11]) | ||
|
||
# ---------------------------------------- Training: Learn weights ---------------------------------------------- # | ||
|
||
# initialize canonical task | ||
C = CanonicalTask(canonical_features) | ||
C.set_end_state(canonical_demo) | ||
C.enumerate_states() | ||
C.set_terminal_idx() | ||
if rank_features: | ||
C.convert_to_rankings() | ||
|
||
# demonstrations | ||
canonical_user_demo = [canonical_demo] | ||
canonical_trajectories = get_trajectories(C.states, canonical_user_demo, C.transition) | ||
|
||
print("Training ...") | ||
|
||
# using abstract features | ||
abstract_features = np.array([C.get_features(state) for state in C.states]) | ||
norm_abstract_features = abstract_features / np.linalg.norm(abstract_features, axis=0) | ||
canonical_rewards_abstract, canonical_weights_abstract = maxent_irl(C, norm_abstract_features, | ||
canonical_trajectories, | ||
optim, init) | ||
|
||
print("Weights have been learned for the canonical task! Fingers X-ed.") | ||
print("Weights -", canonical_weights_abstract) | ||
|
||
# scale weights | ||
if scale_weights: | ||
canonical_weights_abstract /= max(canonical_weights_abstract) | ||
|
||
# ----------------------------------------- Testing: Predict complex -------------------------------------------- # | ||
sample_complex_demo = [1, 3, 5, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 7] | ||
|
||
complex_survey_actions = [0, 4, 1, 5, 6, 7, 2, 3] | ||
action_counts = [1, 1, 4, 1, 4, 1, 4, 1] | ||
preferred_order = [df[q][idx] for q in ['Q15_1', 'Q15_2', 'Q15_3', 'Q15_4', 'Q15_5', 'Q15_6', 'Q15_7', 'Q15_8']] | ||
complex_demo = [] | ||
for _, a in sorted(zip(preferred_order, complex_survey_actions)): | ||
complex_demo += [a]*action_counts[a] | ||
|
||
# initialize complex task | ||
X = ComplexTask(complex_features) | ||
X.set_end_state(sample_complex_demo) | ||
X.enumerate_states() | ||
X.set_terminal_idx() | ||
if rank_features: | ||
X.convert_to_rankings() | ||
|
||
# using abstract features | ||
complex_abstract_features = np.array([X.get_features(state) for state in X.states]) | ||
complex_abstract_features /= np.linalg.norm(complex_abstract_features, axis=0) | ||
|
||
# transfer rewards to complex task | ||
transfer_rewards_abstract = complex_abstract_features.dot(canonical_weights_abstract) | ||
|
||
# compute q-values for each state based on learned weights | ||
qf_transfer, _, _ = value_iteration(X.states, X.actions, X.transition, transfer_rewards_abstract, X.terminal_idx) | ||
|
||
# score for predicting the action based on transferred rewards based on abstract features | ||
# predict_sequence, predict_score = predict_trajectory(qf_transfer, X.states, [complex_demo], X.transition, | ||
# sensitivity=0.0, consider_options=False) | ||
|
||
print("canonical : ", canonical_demo) | ||
print("preference: ", complex_demo) | ||
|
||
# save_path = data_path + "learned_models/" | ||
pickle.dump(canonical_weights_abstract, open(data_path + "weights_" + user_id + ".p", "wb")) | ||
pickle.dump(X, open(data_path + "task_" + user_id + ".p", "wb")) | ||
pickle.dump(complex_abstract_features, open(data_path + "features_" + user_id + ".p", "wb")) | ||
pickle.dump(qf_transfer, open(data_path + "q_values_" + user_id + ".p", "wb")) | ||
pickle.dump(X.states, open(data_path + "states_" + user_id + ".p", "wb")) | ||
print("Q-values have been saved for user " + user_id + ".") |
Oops, something went wrong.