PrincetonUniversity
diff --git a/‎.github/workflows/pnl-ci.yml
+3-1 b/‎.github/workflows/pnl-ci.yml
+3-1
diff --git a/‎Scripts/Models (Under Development)/EGO/Using EMComposition/Revaluation/EGO Model - Revaluation.py
+1-1 b/‎Scripts/Models (Under Development)/EGO/Using EMComposition/Revaluation/EGO Model - Revaluation.py
+1-1
diff --git a/‎Scripts/Models (Under Development)/EGO/Using EpisodicMemoryMechanism/EGO Model - MDP.py
+1-1 b/‎Scripts/Models (Under Development)/EGO/Using EpisodicMemoryMechanism/EGO Model - MDP.py
+1-1
diff --git a/‎conftest.py
+25 b/‎conftest.py
+25
diff --git a/‎cuda_requirements.txt
+1-1 b/‎cuda_requirements.txt
+1-1
diff --git a/‎dev_requirements.txt
+1-1 b/‎dev_requirements.txt
+1-1
diff --git a/‎docs/source/Composition.rst
+1-1 b/‎docs/source/Composition.rst
+1-1
diff --git a/‎docs/source/Core.rst
+1-1 b/‎docs/source/Core.rst
+1-1
diff --git a/‎docs/source/Graph.rst
+5 b/‎docs/source/Graph.rst
+5
diff --git a/‎docs/source/Services.rst
+1-1 b/‎docs/source/Services.rst
+1-1
diff --git a/‎psyneulink/core/components/functions/function.py
+27-10 b/‎psyneulink/core/components/functions/function.py
+27-10
diff --git a/‎psyneulink/core/components/functions/nonstateful/learningfunctions.py
+11-2 b/‎psyneulink/core/components/functions/nonstateful/learningfunctions.py
+11-2
diff --git a/‎psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
+2-2 b/‎psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
+2-2
diff --git a/‎psyneulink/core/components/functions/nonstateful/transferfunctions.py
+53-23 b/‎psyneulink/core/components/functions/nonstateful/transferfunctions.py
+53-23
diff --git a/‎psyneulink/core/components/functions/nonstateful/transformfunctions.py
+5-4 b/‎psyneulink/core/components/functions/nonstateful/transformfunctions.py
+5-4
diff --git a/‎psyneulink/core/components/functions/stateful/integratorfunctions.py
+2-2 b/‎psyneulink/core/components/functions/stateful/integratorfunctions.py
+2-2
diff --git a/‎psyneulink/core/components/functions/stateful/memoryfunctions.py
+1-1 b/‎psyneulink/core/components/functions/stateful/memoryfunctions.py
+1-1
@@ -188,5 +188,7 @@ jobs:
       if: matrix.version-restrict == ''
       with:
         name: dist-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}
-        path: ${{ steps.install.outputs.wheel }} ${{ steps.install.outputs.sdist }}
+        path: |
+          ${{ steps.install.outputs.wheel }}
+          ${{ steps.install.outputs.sdist }}
         retention-days: 2
@@ -692,7 +692,7 @@ def control_function(variable, context=None, **kwargs):
 
     # Validate construction
     proj_from_retrieved_reward_to_control = control_layer.input_ports[1].path_afferents[0]
-    assert proj_from_retrieved_reward_to_control._feedback == True
+    assert proj_from_retrieved_reward_to_control.feedback == EdgeType.FEEDBACK
     assert proj_from_retrieved_reward_to_control in EGO_comp.feedback_projections # retrieved_reward feedback
     assert context_layer.input_port.path_afferents[0].sender.owner == context_layer # recurrent projection
     assert context_layer.input_port.path_afferents[0].parameters.matrix.get()[0][0] == 1-context_integration_rate
 
@@ -697,7 +697,7 @@ def control_function(variable, context=None, **kwargs):
 
     # Validate construction
     proj_from_retrieved_reward_to_control = control_layer.input_ports[1].path_afferents[0]
-    assert proj_from_retrieved_reward_to_control._feedback == True
+    assert proj_from_retrieved_reward_to_control.feedback == EdgeType.FEEDBACK
     assert proj_from_retrieved_reward_to_control in EGO_comp.feedback_projections # retrieved_reward feedback
     assert context_layer.input_port.path_afferents[0].sender.owner == context_layer # recurrent projection
     assert context_layer.input_port.path_afferents[0].parameters.matrix.get()[0][0] == 1-context_integration_rate
 
@@ -1,5 +1,6 @@
 import contextlib
 import doctest
+import inspect
 import io
 import itertools
 import numpy as np
@@ -310,3 +311,27 @@ def pytest_configure(config):
     psyneulink._called_from_pytest = True
 
     patch_parameter_set_value_numeric_check()
+
+
+@pytest.helpers.register
+def get_all_subclasses(
+    type_=psyneulink.core.components.component.ComponentsMeta,
+    module=psyneulink,
+    include_abstract=True,
+    sort=True,
+):
+    classes = []
+
+    for item in module.__all__:
+        cls_ = getattr(module, item)
+
+        if (
+            isinstance(cls_, type_)
+            and (include_abstract or not inspect.isabstract(cls_))
+        ):
+            classes.append(cls_)
+
+    if sort:
+        classes.sort(key=lambda x: x.__name__)
+
+    return classes
@@ -1 +1 @@
-pycuda >2018, <2025
+pycuda >2018, <2026
@@ -1,6 +1,6 @@
 jupyter<1.1.2
 packaging<25.0
-pytest<8.3.5
+pytest<8.3.6
 pytest-benchmark<5.1.1
 pytest-cov<6.0.1
 pytest-forked<1.7.0
 
@@ -31,6 +31,6 @@ Composition
        Report
 
 .. automodule:: psyneulink.core.compositions.composition
-   :members: Composition, NodeRole, Graph
+   :members: Composition, NodeRole
    :private-members:
    :exclude-members: Parameters, show_structure, CompositionError, get_inputs_format, external_input_ports_of_all_input_nodes, external_input_ports
@@ -96,4 +96,4 @@ Core
    - `Report`
    - `Log`
    - `mdf`
-   - `Utilities`
+   - :py:module:`Graph`
@@ -0,0 +1,5 @@
+Graph
+=====
+
+.. automodule:: psyneulink.core.globals.graph
+    :members:
@@ -10,4 +10,4 @@ Services
    Preferences
    json
    Compilation
-   Utilities
+   Graph
@@ -171,9 +171,22 @@
 from psyneulink.core.globals.preferences.preferenceset import PreferenceEntry, PreferenceLevel
 from psyneulink.core.globals.registry import register_category
 from psyneulink.core.globals.utilities import (
-    convert_all_elements_to_np_array, convert_to_np_array, get_global_seed, is_instance_or_subclass, object_has_single_value, parameter_spec, parse_valid_identifier, safe_len,
-    SeededRandomState, try_extract_0d_array_item, contains_type, is_numeric, NumericCollections,
-    random_matrix, array_from_matrix_string
+    NumericCollections,
+    SeededRandomState,
+    _get_global_seed,
+    array_from_matrix_string,
+    contains_type,
+    convert_all_elements_to_np_array,
+    convert_to_np_array,
+    is_instance_or_subclass,
+    is_numeric,
+    is_numeric_scalar,
+    object_has_single_value,
+    parameter_spec,
+    parse_valid_identifier,
+    random_matrix,
+    safe_len,
+    try_extract_0d_array_item,
 )
 
 __all__ = [
@@ -357,7 +370,7 @@ def _seed_setter(value, owning_component, context, *, compilation_sync):
 
     value = try_extract_0d_array_item(value)
     if value is None or value == DEFAULT_SEED():
-        value = get_global_seed()
+        value = _get_global_seed()
 
     # Remove any old PRNG state
     owning_component.parameters.random_state.set(None, context=context)
@@ -1000,8 +1013,8 @@ def _get_pytorch_fct_param_value(self, param_name, device, context):
 
 
 # *****************************************   EXAMPLE FUNCTION   *******************************************************
-PROPENSITY = "PROPENSITY"
-PERTINACITY = "PERTINACITY"
+PROPENSITY = "propensity"
+PERTINACITY = "pertinacity"
 
 
 class ArgumentTherapy(Function_Base):
@@ -1083,6 +1096,10 @@ class ArgumentTherapy(Function_Base):
         REPORT_OUTPUT_PREF: PreferenceEntry(False, PreferenceLevel.INSTANCE),
     }
 
+    class Parameters(Function_Base.Parameters):
+        propensity = None
+        pertinacity = None
+
     # Mode indicators
     class Manner(Enum):
         OBSEQUIOUS = 0
@@ -1095,16 +1112,16 @@ class Manner(Enum):
     @check_user_specified
     def __init__(self,
                  default_variable=None,
-                 propensity=10.0,
-                 pertincacity=Manner.CONTRARIAN,
+                 propensity=Manner.CONTRARIAN,
+                 pertinacity=10.0,
                  params=None,
                  owner=None,
                  prefs:  Optional[ValidPrefSet] = None):
 
         super().__init__(
             default_variable=default_variable,
             propensity=propensity,
-            pertinacity=pertincacity,
+            pertinacity=pertinacity,
             params=params,
             owner=owner,
             prefs=prefs,
@@ -1155,7 +1172,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
 
             # Validate param
             if param_name == PERTINACITY:
-                if isinstance(param_value, numbers.Number) and 0 <= param_value <= 10:
+                if is_numeric_scalar(param_value) and 0 <= param_value <= 10:
                     # target_set[PERTINACITY] = param_value
                     pass  # This leaves param in request_set, clear to be assigned to target_set in call to super below
                 else:
 
@@ -506,6 +506,15 @@ def func(entry_to_store,
                  decay_rate,
                  random_state)->torch.tensor:
             """Decay existing memories and replace weakest entry with entry_to_store (parallel EMStorage._function)"""
+
+            # If the batch_size is not equal to one then we need to raise an exception.
+            if len(entry_to_store.shape) > 2:
+                if entry_to_store.shape[0] != 1:
+                    raise NotImplementedError("EMSStorage has not been implemented for batch sizes greater than 1")
+                else:
+                    # Drop the singleton batch dimension
+                    entry_to_store = entry_to_store[0]
+
             if random_state.uniform(0, 1) < storage_prob:
                 if decay_rate:
                     memory_matrix *= torch.tensor(decay_rate)
@@ -515,9 +524,9 @@ def func(entry_to_store,
                     # Find weakest entry (i.e., with lowest norm) along specified axis of matrix
                     idx_of_min = torch.argmin(torch.linalg.norm(memory_matrix, axis=axis))
                 if axis == 0:
-                    memory_matrix[:,idx_of_min] = entry_to_store
+                    memory_matrix[:,idx_of_min] = entry_to_store[0]
                 elif axis == 1:
-                    memory_matrix[idx_of_min,:] = entry_to_store
+                    memory_matrix[idx_of_min,:] = entry_to_store[0]
             return memory_matrix
         return func
 
 
@@ -641,7 +641,7 @@ def _evaluate(self, variable=None, context=None, params=None, fit_evaluate=False
         # Run compiled mode if requested by parameter and everything is initialized
         if self.owner and self.owner.parameters.comp_execution_mode._get(context) != 'Python' and \
           ContextFlags.PROCESSING in context.flags:
-            all_samples = [s for s in itertools.product(*self.search_space)]
+            all_samples = list(itertools.product(*self.search_space))
             all_values, num_evals = self._grid_evaluate(self.owner, context, fit_evaluate)
             assert len(all_values) == num_evals
             assert len(all_samples) == num_evals
@@ -846,7 +846,7 @@ def reset_grid(self, context):
         """Reset iterators in `search_space <GridSearch.search_space>`"""
         for s in self.search_space:
             s.reset()
-        self.parameters.grid._set(itertools.product(*[s for s in self.search_space]), context)
+        self.parameters.grid._set((s for s in itertools.product(*[s for s in self.search_space])), context)
 
     def _traverse_grid(self, variable, sample_num, context=None):
         """Get next sample from grid.
 
@@ -2828,9 +2828,10 @@ class SoftMax(TransferFunction):
     <SoftMax.gain>` parametrically based on the `variable <SoftMax.variable>`:
 
     - *mask_threshold* -- setting the **mask_threshold** argument to a scalar value causes the `variable
-      <SoftMax.variable>` to be thresholded by that value before applying the SoftMax function; any elements of
-      `variable <SoftMax.variable>` with an absolute value below the threshold are set to 0; all others are scaled
-      by the specified `gain <SoftMax.gain>` and then passed through the SoftMax function.  This only applies if the
+      <SoftMax.variable>` to be thresholded by that value before applying the SoftMax function; Each element in
+      variable <SoftMax.variable> is first scaled by gain <SoftMax.gain>. Then, any elements with an absolute
+      value below *mask_threshold* are set to negative infinity (``-inf``), effectively masking them since
+      ``exp(-inf) = 0``. The remaining values are then passed through the SoftMax function. This only applies if the
       **gain** argument is specified as a scalar; if it is specified as *ADAPTIVE*, then the **mask_threshold**
       argument is ignored.
 
@@ -2920,10 +2921,11 @@ class SoftMax(TransferFunction):
 
     mask_threshold : scalar or None
         determines whether the `variable <SoftMax.variable>` is thresholded before applying the SoftMax function;
-        if it is a scalar, only elements of `variable <SoftMax.variable>` with an absolute value greater than that
-        value are considered when applying the SoftMax function (which are then scaled by the `gain <SoftMax.gain>`
-        parameter; all other elements are assigned 0.  This only applies if `gain <SoftMax.gain>` is specified as a
-        scalar;  otherwise it is ignored (see `Thresholding and Adaptive Gain <SoftMax_AdaptGain>` for details).
+        if it is a scalar, each elements of `variable <SoftMax.variable>` is first scaled by `<SoftMax.gain>`. Then,
+        only elements with an absolute value greater than *mask_threshold* are considered when applying the SoftMax
+        function, while all other elements are set to ``-inf`` effectively masking them since ``exp(-inf) = 0``.
+        This only applies if `gain <SoftMax.gain>` is specified as a scalar;  otherwise it is ignored
+        (see `Thresholding and Adaptive Gain <SoftMax_AdaptGain>` for details).
 
     adapt_scale : scalar
         determines the *scale* parameter using by the `adapt_gain <SoftMax.adapt_gain>` method (see method for details).
@@ -3149,22 +3151,31 @@ def _validate_variable(self, variable, context=None):
         return np.asarray(variable)
 
     def apply_softmax(self, input_value, gain, mask_threshold, output_type):
-
         # Modulate input_value by gain
         v = gain * input_value
-        # Shift by max to avoid extreme values:
-        v = v - np.max(v)
+
+        # Mask threshold
+        if mask_threshold is not None:
+            if np.any(v < 0):
+                warnings.warn(f"SoftMax function: mask_threshold is set "
+                              f"to {mask_threshold} but input_value contains negative values."
+                              f"Masking will be applied to the magnitude of the input.")
+
+            v = np.where(np.abs(v) > mask_threshold, v, -np.inf)
+
+        # Make numerically stable by shifting by max value
+        if np.any(v != -np.inf):
+            v = v - np.max(v)
+
         # Exponentiate
         v = np.exp(v)
-        # Threshold if specified:
-        if mask_threshold:
-            v = v * np.where(input_value > mask_threshold, v, 0)
+
         # Normalize (to sum to 1)
-        if not any(v):
+        if not np.any(v):
             # If v is all zeros, avoid divide by zero in normalize and return all zeros for softmax
             sm = v
         else:
-            sm = v / np.sum(v, axis=0)
+            sm = v / np.sum(v)
 
         # Generate one-hot encoding based on selected output_type
         if output_type in {ARG_MAX, ARG_MAX_INDICATOR, MAX_VAL, MAX_INDICATOR}:
@@ -3472,15 +3483,34 @@ def _gen_pytorch_fct(self, device, context=None):
         if isinstance(gain, str) and gain == ADAPTIVE:
             return lambda x: (torch.softmax(self._gen_pytorch_adapt_gain_fct(device, context)(x) * x, -1))
 
-        elif mask_threshold:
+        elif mask_threshold is not None:
             def pytorch_thresholded_softmax(_input: torch.Tensor) -> torch.Tensor:
-                # Mask elements of input below threshold
-                _mask = (torch.abs(_input) > mask_threshold)
-                # Subtract off the max value in the input to eliminate extreme values, exponentiate, and apply mask
-                masked_exp = _mask * torch.exp(gain * (_input - torch.max(_input, -1, keepdim=True)[0]))
-                if (masked_exp == 0).all():
-                    return masked_exp
-                return masked_exp / torch.sum(masked_exp, -1, keepdim=True)
+                v = gain * _input
+
+                # Apply threshold-based masking
+                if mask_threshold is not None:
+                    if torch.any(_input < 0):
+                        warnings.warn(f"Softmax function: mask_threshold is set to {mask_threshold}, "
+                                      f"but input contains negative values. "
+                                      f"Masking will be applied to the magnitude of the input.")
+
+                    # Create a mask where values below threshold are set to -inf
+                    mask = torch.abs(v) > mask_threshold
+                    v = v.masked_fill(~mask, float('-inf'))  # More stable than torch.where()
+
+                # Handle case where all values are masked (return tensor with gradient support)
+                if torch.all(~mask):
+                    return torch.full_like(v, 0.0, requires_grad=True)
+
+                # Make numerically stable by shifting max value
+                max_v = torch.max(v[mask])  # Avoid computing max over -inf
+                v = v - max_v
+
+                # Compute softmax (PyTorch handles -inf correctly)
+                exp_v = torch.exp(v)
+                sm = exp_v / torch.sum(exp_v, dim=-1, keepdim=True)
+
+                return sm
             # Return the function
             return pytorch_thresholded_softmax
 
 
@@ -1593,16 +1593,17 @@ def _gen_pytorch_fct(self, device, context=None):
         weights = self._get_pytorch_fct_param_value('weights', device, context)
         if weights is not None:
             weights = torch.tensor(weights, device=device).double()
+        # Note: the first dimension of x is batch, aggregate over the second dimension
         if self.operation == SUM:
             if weights is not None:
-                return lambda x: torch.sum(x * weights, 0)
+                return lambda x: torch.sum(x * weights, 1)
             else:
-                return lambda x: torch.sum(x, 0)
+                return lambda x: torch.sum(x, 1)
         elif self.operation == PRODUCT:
             if weights is not None:
-                return lambda x: torch.prod(x * weights, 0)
+                return lambda x: torch.prod(x * weights, 1)
             else:
-                return lambda x: torch.prod(x, 0)
+                return lambda x: torch.prod(x, 1)
         else:
             from psyneulink.library.compositions.autodiffcomposition import AutodiffCompositionError
             raise AutodiffCompositionError(f"The 'operation' parameter of {function.componentName} is not supported "
 
@@ -2070,15 +2070,15 @@ def _validate_params(self, request_set, target_set=None, context=None):
 
         if RATE in request_set and request_set[RATE] is not None:
             rate = request_set[RATE]
-            if np.isscalar(rate):
+            if is_numeric_scalar(rate):
                 rate = [rate]
             if not all_within_range(rate, 0, 1):
                 raise FunctionError("Value(s) specified for {} argument of {} ({}) must be in interval [0,1]".
                                     format(repr(RATE), self.__class__.__name__, rate))
 
         if DECAY in request_set and request_set[DECAY] is not None:
             decay = request_set[DECAY]
-            if np.isscalar(decay):
+            if is_numeric_scalar(decay):
                 decay = [decay]
             if not all(0.0 <= d <= 1.0 for d in decay):
                 raise FunctionError("Value(s) specified for {} argument of {} ({}) must be in interval [0,1]".
 
@@ -393,7 +393,7 @@ def _distance_field_weights_setter(value, owning_component=None, context=None):
     # NOTE: need the following to accommodate various forms of specification (single value, None's, etc)
     #       that are resolved elsewhere
     # FIX: STANDARDIZE FORMAT FOR FIELDWEIGHTS HERE (AS LIST OF INTS) AND GET RID OF THE FOLLOWING
-    test_val = np.array([int(np.array(val).item()) if val else 0 for val in value])
+    test_val = np.array([int(np.array(val).item()) if (arr := np.array(val)).size and arr.item() else 0 for val in value])
     test_val = np.full(len(variable), test_val) if len(test_val) == 1 else test_val
     test_curr_field_weights = np.array([int(np.array(val).item()) if val else 0 for val in current_field_weights])
     test_curr_field_weights = (np.full(len(variable), test_curr_field_weights) if len(variable) == 1
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-pycuda >2018, <2025`
	`1`	`+pycuda >2018, <2026`