Fix PyTorch stateful RNN/LSTM gradient computation error resolves #20875

(#20916) * Fix PyTorch stateful RNN gradient computation error * Updates post feedback
keras-team · Mar 3, 2025 · f7115c2 · f7115c2
1 parent 7a7bca6
commit f7115c2
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/keras/src/layers/rnn/rnn.py b/keras/src/layers/rnn/rnn.py
@@ -331,6 +331,12 @@ def inner_loop(self, sequences, initial_state, mask, training=False):
             cell_kwargs["training"] = training
 
         def step(inputs, states):
+            # Create new tensor copies when using PyTorch backend
+            # with stateful=True. This prevents in-place modifications
+            # that would otherwise break PyTorch's autograd functionality
+            # by modifying tensors needed for gradient computation.
+            if backend.backend() == "torch" and self.stateful:
+                states = tree.map_structure(ops.copy, states)
             output, new_states = self.cell(inputs, states, **cell_kwargs)
             if not tree.is_nested(new_states):
                 new_states = [new_states]