1010
1111
1212class MetaCurriculum (object ):
13- """A MetaCurriculum holds curriculums. Each curriculum is associated to a particular
14- brain in the environment.
13+ """A MetaCurriculum holds curriculums. Each curriculum is associated to a
14+ particular brain in the environment.
1515 """
1616
1717 def __init__ (self , curriculum_folder , default_reset_parameters ):
@@ -33,10 +33,12 @@ def __init__(self, curriculum_folder, default_reset_parameters):
3333 brain_name = curriculum_filename .split ('.' )[0 ]
3434 curriculum_filepath = \
3535 os .path .join (curriculum_folder , curriculum_filename )
36- curriculum = Curriculum (curriculum_filepath , default_reset_parameters )
36+ curriculum = Curriculum (curriculum_filepath ,
37+ default_reset_parameters )
3738
3839 # Check if any two curriculums use the same reset params.
39- if any ([(parameter in curriculum .get_config ().keys ()) for parameter in used_reset_parameters ]):
40+ if any ([(parameter in curriculum .get_config ().keys ())
41+ for parameter in used_reset_parameters ]):
4042 logger .warning ('Two or more curriculums will '
4143 'attempt to change the same reset '
4244 'parameter. The result will be '
@@ -69,18 +71,57 @@ def lesson_nums(self, lesson_nums):
6971 for brain_name , lesson in lesson_nums .items ():
7072 self .brains_to_curriculums [brain_name ].lesson_num = lesson
7173
72- def increment_lessons (self , progresses ):
73- """Increments all the lessons of all the curriculums in this MetaCurriculum.
74+ def _lesson_ready_to_increment (self , brain_name , reward_buff_size ):
75+ """Determines whether the curriculum of a specified brain is ready
76+ to attempt an increment.
7477
7578 Args:
76- progresses (dict): A dict of brain name to progress.
79+ brain_name (str): The name of the brain whose curriculum will be
80+ checked for readiness.
81+ reward_buff_size (int): The size of the reward buffer of the trainer
82+ that corresponds to the specified brain.
83+
84+ Returns:
85+ Whether the curriculum of the specified brain should attempt to
86+ increment its lesson.
87+ """
88+ return reward_buff_size >= (self .brains_to_curriculums [brain_name ]
89+ .min_lesson_length )
90+
91+ def increment_lessons (self , measure_vals , reward_buff_sizes = None ):
92+ """Attempts to increments all the lessons of all the curriculums in this
93+ MetaCurriculum. Note that calling this method does not guarantee the
94+ lesson of a curriculum will increment. The lesson of a curriculum will
95+ only increment if the specified measure threshold defined in the
96+ curriculum has been reached and the minimum number of episodes in the
97+ lesson have been completed.
98+
99+ Args:
100+ measure_vals (dict): A dict of brain name to measure value.
101+ reward_buff_sizes (dict): A dict of brain names to the size of their
102+ corresponding reward buffers.
103+
104+ Returns:
105+ A dict from brain name to whether that brain's lesson number was
106+ incremented.
77107 """
78- for brain_name , progress in progresses .items ():
79- self .brains_to_curriculums [brain_name ].increment_lesson (progress )
108+ ret = {}
109+ if reward_buff_sizes :
110+ for brain_name , buff_size in reward_buff_sizes .items ():
111+ if self ._lesson_ready_to_increment (brain_name , buff_size ):
112+ measure_val = measure_vals [brain_name ]
113+ ret [brain_name ] = (self .brains_to_curriculums [brain_name ]
114+ .increment_lesson (measure_val ))
115+ else :
116+ for brain_name , measure_val in measure_vals .items ():
117+ ret [brain_name ] = (self .brains_to_curriculums [brain_name ]
118+ .increment_lesson (measure_val ))
119+ return ret
80120
81121
82122 def set_all_curriculums_to_lesson_num (self , lesson_num ):
83- """Sets all the curriculums in this meta curriculum to a specified lesson number.
123+ """Sets all the curriculums in this meta curriculum to a specified
124+ lesson number.
84125
85126 Args:
86127 lesson_num (int): The lesson number which all the curriculums will
@@ -91,7 +132,8 @@ def set_all_curriculums_to_lesson_num(self, lesson_num):
91132
92133
93134 def get_config (self ):
94- """Get the combined configuration of all curriculums in this MetaCurriculum.
135+ """Get the combined configuration of all curriculums in this
136+ MetaCurriculum.
95137
96138 Returns:
97139 A dict from parameter to value.
0 commit comments