get_initial_loop_state code factorization

varisd · jlibovicky · commit 89eb9c238a69 · 2019-03-26T10:36:14.000+01:00
diff --git a/neuralmonkey/decoders/autoregressive.py b/neuralmonkey/decoders/autoregressive.py
@@ -378,8 +378,14 @@ def runtime_logprobs(self) -> tf.Tensor:
     def output_dimension(self) -> int:
         raise NotImplementedError("Abstract property")
 
-    def get_initial_loop_state(self) -> LoopState:
+    def get_initial_feedables(self) -> DecoderFeedables:
+        return DecoderFeedables(
+            step=tf.constant(0, tf.int32),
+            finished=tf.zeros([self.batch_size], dtype=tf.bool),
+            embedded_input=self.embed_input_symbols(self.go_symbols),
+            other=None)
 
+    def get_initial_histories(self) -> DecoderHistories:
         output_states = tf.zeros(
             shape=[0, self.batch_size, self.embedding_size],
             dtype=tf.float32,
@@ -400,25 +406,21 @@ def get_initial_loop_state(self) -> LoopState:
             dtype=tf.float32,
             name="hist_logits")
 
-        feedables = DecoderFeedables(
-            step=tf.constant(0, tf.int32),
-            finished=tf.zeros([self.batch_size], dtype=tf.bool),
-            embedded_input=self.embed_input_symbols(self.go_symbols),
-            other=None)
-
-        histories = DecoderHistories(
+        return DecoderHistories(
             logits=logits,
             output_states=output_states,
             output_mask=output_mask,
             output_symbols=output_symbols,
             other=None)
 
-        constants = DecoderConstants(train_inputs=self.train_inputs)
+    def get_initial_constants(self) -> DecoderConstants:
+        return DecoderConstants(train_inputs=self.train_inputs)
 
+    def get_initial_loop_state(self) -> LoopState:
         return LoopState(
-            histories=histories,
-            constants=constants,
-            feedables=feedables)
+            feedables=self.get_initial_feedables(),
+            histories=self.get_initial_histories(),
+            constants=self.get_initial_constants())
 
     def loop_continue_criterion(self, *args) -> tf.Tensor:
         """Decide whether to break out of the while loop.
diff --git a/neuralmonkey/decoders/decoder.py b/neuralmonkey/decoders/decoder.py
@@ -4,7 +4,7 @@
 from typeguard import check_argument_types
 
 from neuralmonkey.decoders.autoregressive import (
-    AutoregressiveDecoder, LoopState)
+    AutoregressiveDecoder, DecoderFeedables, DecoderHistories, LoopState)
 from neuralmonkey.attention.base_attention import BaseAttention
 from neuralmonkey.vocabulary import Vocabulary
 from neuralmonkey.model.sequence import EmbeddedSequence
@@ -357,17 +357,20 @@ def next_state(self, loop_state: LoopState) -> Tuple[tf.Tensor, Any, Any]:
 
         return (output, new_feedables, new_histories)
 
-    def get_initial_loop_state(self) -> LoopState:
-        default_ls = AutoregressiveDecoder.get_initial_loop_state(self)
-        feedables = default_ls.feedables
-        histories = default_ls.histories
+    def get_initial_feedables(self) -> DecoderFeedables:
+        feedables = AutoregressiveDecoder.get_initial_feedables(self)
 
         rnn_feedables = RNNFeedables(
             prev_contexts=[tf.zeros([self.batch_size, a.context_vector_size])
                            for a in self.attentions],
             prev_rnn_state=self.initial_state,
             prev_rnn_output=self.initial_state)
 
+        return feedables._replace(other=rnn_feedables)
+
+    def get_initial_histories(self) -> DecoderHistories:
+        histories = AutoregressiveDecoder.get_initial_histories(self)
+
         rnn_histories = RNNHistories(
             rnn_outputs=tf.zeros(
                 shape=[0, self.batch_size, self.rnn_size],
@@ -376,10 +379,7 @@ def get_initial_loop_state(self) -> LoopState:
             attention_histories=[a.initial_loop_state()
                                  for a in self.attentions if a is not None])
 
-        return LoopState(
-            histories=histories._replace(other=rnn_histories),
-            constants=default_ls.constants,
-            feedables=feedables._replace(other=rnn_feedables))
+        return histories._replace(other=rnn_histories)
 
     def finalize_loop(self, final_loop_state: LoopState,
                       train_mode: bool) -> None:
diff --git a/neuralmonkey/decoders/transformer.py b/neuralmonkey/decoders/transformer.py
@@ -452,10 +452,8 @@ def train_loop_result(self) -> LoopState:
             histories=histories,
             constants=decoder_ls.constants)
 
-    def get_initial_loop_state(self) -> LoopState:
-        default_ls = AutoregressiveDecoder.get_initial_loop_state(self)
-        feedables = default_ls.feedables
-        histories = default_ls.histories
+    def get_initial_feedables(self) -> DecoderFeedables:
+        feedables = AutoregressiveDecoder.get_initial_feedables(self)
 
         tr_feedables = TransformerFeedables(
             input_sequence=tf.zeros(
@@ -467,6 +465,11 @@ def get_initial_loop_state(self) -> LoopState:
                 dtype=tf.float32,
                 name="input_mask"))
 
+        return feedables._replace(other=tr_feedables)
+
+    def get_initial_histories(self) -> DecoderHistories:
+        histories = AutoregressiveDecoder.get_initial_histories(self)
+
         # TODO: record histories properly
         tr_histories = tf.zeros([])
         # tr_histories = TransformerHistories(
@@ -479,10 +482,7 @@ def get_initial_loop_state(self) -> LoopState:
         #                                    self.n_heads_enc)
         #        for a in range(self.depth)])
 
-        return LoopState(
-            histories=histories._replace(other=tr_histories),
-            constants=default_ls.constants,
-            feedables=feedables._replace(other=tr_feedables))
+        return histories._replace(other=tr_histories)
 
     def next_state(self, loop_state: LoopState) -> Tuple[tf.Tensor, Any, Any]:
         feedables = loop_state.feedables