Skip to content
This repository was archived by the owner on Oct 31, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion elf/utils_elf.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def copy_from(self, src, batch_key=""):
elif isinstance(v, (int, float)):
bk.fill_(v)
else:
bk[:] = v
bk[:] = v.view_as(bk[:])

else:
raise ValueError("Batch[%s]: \"%s\" in reply is missing in batch specification" % (batch_key, k))
Expand Down
2 changes: 1 addition & 1 deletion rlpytorch/methods/actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,5 @@ def update(self, mi, batch, stats):
err = add_err(err, policy_err)
err = add_err(err, self.value_matcher.feed({ value_node: V, "target" : R}, stats))

stats["cost"].feed(err.data[0] / (T - 1))
stats["cost"].feed(err.item() / (T - 1))
err.backward()
6 changes: 3 additions & 3 deletions rlpytorch/methods/discounted_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self):
def setR(self, R, stats):
''' Set rewards and feed to stats'''
self.R = R
stats["init_reward"].feed(R.mean())
stats["init_reward"].feed(R.mean().item())

def feed(self, batch, stats):
'''
Expand All @@ -49,7 +49,7 @@ def feed(self, batch, stats):
if terminal:
self.R[i] = r[i]

stats["reward"].feed(r.mean())
stats["acc_reward"].feed(self.R.mean())
stats["reward"].feed(r.mean().item())
stats["acc_reward"].feed(self.R.mean().item())

return self.R
8 changes: 4 additions & 4 deletions rlpytorch/methods/policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,14 @@ def feed(self, Q, pi_s, actions, stats, old_pi_s=dict()):
entropy_err = add_err(entropy_err, errs["entropy_err"])
log_pi_s.append(errs["logpi"])

stats["nll_" + pi_node].feed(errs["policy_err"].data[0])
stats["entropy_" + pi_node].feed(errs["entropy_err"].data[0])
stats["nll_" + pi_node].feed(errs["policy_err"].item())
stats["entropy_" + pi_node].feed(errs["entropy_err"].item())

for log_pi in log_pi_s:
self._reg_backward(log_pi, Variable(pg_weights))

if len(args.policy_action_nodes) > 1:
stats["total_nll"].feed(policy_err.data[0])
stats["total_entropy"].feed(entropy_err.data[0])
stats["total_nll"].feed(policy_err.item())
stats["total_entropy"].feed(entropy_err.item())

return policy_err + entropy_err * args.entropy_ratio
4 changes: 2 additions & 2 deletions rlpytorch/methods/value_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def feed(self, batch, stats):
V = batch[self.value_node]
value_err = self.value_loss(V, Variable(batch["target"]))
self._reg_backward(V)
stats["predicted_" + self.value_node].feed(V.data[0])
stats[self.value_node + "_err"].feed(value_err.data[0])
stats["predicted_" + self.value_node].feed(V[0].item())
stats[self.value_node + "_err"].feed(value_err.item())

return value_err
2 changes: 1 addition & 1 deletion rlpytorch/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _var(self, x):
Variable for x
'''
if not isinstance(x, Variable):
return Variable(x, volatile=self.volatile)
return Variable(x)
else:
return x

Expand Down
2 changes: 1 addition & 1 deletion rlpytorch/model_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def add_model(self, key, model, copy=False, cuda=False, gpu_id=None, opt=False,
self.models[key] = model.clone() if copy else model
if cuda:
if gpu_id is not None:
self.models[key].cuda(device_id=gpu_id)
self.models[key].cuda(device=gpu_id)
else:
self.models[key].cuda()

Expand Down
2 changes: 1 addition & 1 deletion rlpytorch/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def load_model(self, params):
sys.exit(1)

if args.gpu is not None and args.gpu >= 0:
model.cuda(device_id=args.gpu)
model.cuda(device=args.gpu)

return model

Expand Down
6 changes: 3 additions & 3 deletions rlpytorch/stats/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ def feed_batch(self, batch, hist_idx=0):
last_r = batch["last_r"][hist_idx]

for batch_idx, (id, last_terminal) in enumerate(zip(ids, last_terminals)):
self.feed(id, last_r[batch_idx])
if last_terminal:
self.terminal(id)
self.feed(id.item(), last_r[batch_idx].item())
if last_terminal.item() == 1:
self.terminal(id.item())

class RewardCount(EvalCount):
''' Class to accumulate rewards achieved'''
Expand Down
6 changes: 3 additions & 3 deletions rlpytorch/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os
import sys
import torch
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'elf'))
import utils_elf
from ..args_provider import ArgsProvider
Expand Down Expand Up @@ -69,9 +70,8 @@ def actor(self, batch):

# actor model.
m = self.mi[self.actor_name]
m.set_volatile(True)
state_curr = m.forward(batch.hist(0))
m.set_volatile(False)
with torch.no_grad():
state_curr = m.forward(batch.hist(0))

if self.sampler is not None:
reply_msg = self.sampler.sample(state_curr)
Expand Down