From 614e6c211e62f3880fd8c44b273a1ee5a57bd0ee Mon Sep 17 00:00:00 2001 From: Al Rigazzi Date: Mon, 25 Oct 2021 11:20:05 -0500 Subject: [PATCH 1/3] Change env var separator because of Slurm bug --- smartsim/control/controller.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/smartsim/control/controller.py b/smartsim/control/controller.py index 53a012668..f4e620505 100644 --- a/smartsim/control/controller.py +++ b/smartsim/control/controller.py @@ -416,12 +416,12 @@ def _prep_entity_client_env(self, entity): addresses = self._jobs.get_db_host_addresses() if addresses: if len(addresses) <= 128: - client_env["SSDB"] = ",".join(addresses) + client_env["SSDB"] = ";".join(addresses) else: # Cap max length of SSDB - client_env["SSDB"] = ",".join(addresses[:128]) + client_env["SSDB"] = ";".join(addresses[:128]) if entity.incoming_entities: - client_env["SSKEYIN"] = ",".join( + client_env["SSKEYIN"] = ";".join( [in_entity.name for in_entity in entity.incoming_entities] ) if entity.query_key_prefixing(): From ac1af4c46ce23a5e406b8e4c384f137be1aa1d92 Mon Sep 17 00:00:00 2001 From: Al Rigazzi Date: Tue, 26 Oct 2021 11:41:07 -0500 Subject: [PATCH 2/3] Add test and slurm comma-separated var handling --- smartsim/control/controller.py | 6 +-- smartsim/launcher/step/slurmStep.py | 6 ++- smartsim/settings/slurmSettings.py | 10 ++++- tests/on_wlm/test_launch_orc_slurm.py | 37 +++++++++++++++++++ .../test_configs/incoming_entities_reader.py | 25 +++++++++++++ 5 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 tests/test_configs/incoming_entities_reader.py diff --git a/smartsim/control/controller.py b/smartsim/control/controller.py index 3a6b4c05b..78df428e9 100644 --- a/smartsim/control/controller.py +++ b/smartsim/control/controller.py @@ -421,12 +421,12 @@ def _prep_entity_client_env(self, entity): addresses = self._jobs.get_db_host_addresses() if addresses: if len(addresses) <= 128: - client_env["SSDB"] = ";".join(addresses) + client_env["SSDB"] = ",".join(addresses) else: # Cap max length of SSDB - client_env["SSDB"] = ";".join(addresses[:128]) + client_env["SSDB"] = ",".join(addresses[:128]) if entity.incoming_entities: - client_env["SSKEYIN"] = ";".join( + client_env["SSKEYIN"] = ",".join( [in_entity.name for in_entity in entity.incoming_entities] ) if entity.query_key_prefixing(): diff --git a/smartsim/launcher/step/slurmStep.py b/smartsim/launcher/step/slurmStep.py index 80e49384e..642a58ba2 100644 --- a/smartsim/launcher/step/slurmStep.py +++ b/smartsim/launcher/step/slurmStep.py @@ -130,13 +130,17 @@ def get_launch_cmd(self): srun_cmd = [srun, "--output", output, "--error", error, "--job-name", self.name] + if self.alloc: srun_cmd += ["--jobid", str(self.alloc)] if self.run_settings.env_vars: - env_var_str = self.run_settings.format_env_vars() + env_var_str, comma_separated_env_vars = self.run_settings.format_env_vars() srun_cmd += ["--export", env_var_str] + if comma_separated_env_vars: + srun_cmd = ["env"] + comma_separated_env_vars + srun_cmd + srun_cmd += self.run_settings.format_run_args() srun_cmd += self._build_exe() return srun_cmd diff --git a/smartsim/settings/slurmSettings.py b/smartsim/settings/slurmSettings.py index 9a7b036e8..a778c41e6 100644 --- a/smartsim/settings/slurmSettings.py +++ b/smartsim/settings/slurmSettings.py @@ -180,6 +180,8 @@ def format_env_vars(self): # TODO make these overridable by user presets = ["PATH", "LD_LIBRARY_PATH", "PYTHONPATH"] + comma_separated_format_str = [] + def add_env_var(var, format_str): try: value = os.environ[var] @@ -196,8 +198,12 @@ def add_env_var(var, format_str): # add user supplied variables for k, v in self.env_vars.items(): - format_str += "=".join((k, str(v))) + "," - return format_str.rstrip(",") + if "," in str(v): + comma_separated_format_str += ["=".join((k, str(v)))] + format_str += k + "," + else: + format_str += "=".join((k, str(v))) + "," + return format_str.rstrip(","), comma_separated_format_str class SbatchSettings(BatchSettings): diff --git a/tests/on_wlm/test_launch_orc_slurm.py b/tests/on_wlm/test_launch_orc_slurm.py index a501d4497..567c75129 100644 --- a/tests/on_wlm/test_launch_orc_slurm.py +++ b/tests/on_wlm/test_launch_orc_slurm.py @@ -64,3 +64,40 @@ def test_launch_slurm_cluster_orc(fileutils, wlmutils): exp.stop(orc) status = exp.get_status(orc) assert all([stat == constants.STATUS_CANCELLED for stat in status]) + + +def test_incoming_entities(fileutils, wlmutils): + """Mirroring of how SmartSim generates SSKEYIN""" + + exp_name = "test-incoming-entities" + exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher()) + test_dir = fileutils.make_test_dir(exp_name) + + network_interface = wlmutils.get_test_interface() + orc = SlurmOrchestrator(6780, db_nodes=1, batch=False, interface=network_interface) + orc.set_path(test_dir) + + sleep = fileutils.get_test_conf_path("sleep.py") + sleep_settings = wlmutils.get_run_settings("python", f"{sleep} --time=3") + + sleep_ensemble = exp.create_ensemble("sleep-ensemble", + run_settings=sleep_settings, + replicas=2) + + sskeyin_reader = fileutils.get_test_conf_path("incoming_entities_reader.py") + sskeyin_reader_settings = wlmutils.get_run_settings("python", f"{sskeyin_reader}") + sskeyin_reader_settings.env_vars["NAME_0"] = sleep_ensemble.entities[0].name + sskeyin_reader_settings.env_vars["NAME_1"] = sleep_ensemble.entities[1].name + sskeyin_reader = exp.create_model("sskeyin_reader", path=test_dir, run_settings=sskeyin_reader_settings) + sskeyin_reader.register_incoming_entity(sleep_ensemble.entities[0]) + sskeyin_reader.register_incoming_entity(sleep_ensemble.entities[1]) + + exp.start(orc, block=False) + try: + exp.start(sskeyin_reader, block=True) + assert exp.get_status(sskeyin_reader)[0] == constants.STATUS_COMPLETED + except Exception as e: + exp.stop(orc) + raise e + + exp.stop(orc) diff --git a/tests/test_configs/incoming_entities_reader.py b/tests/test_configs/incoming_entities_reader.py new file mode 100644 index 000000000..674f9fa7a --- /dev/null +++ b/tests/test_configs/incoming_entities_reader.py @@ -0,0 +1,25 @@ +from os import environ + +"""This script checks that the names of multiple + incoming entities is correctly passed through + a WLM. + SSKEYIN is the environment variable passed by + SmartSim and it should contain two names: + sleep_0 and sleep_1. For this test, we + also pass them in as separate variables + NAME_0 and NAME_1. This program will fail if + SSKEYIN does not contain NAME_0 and NAME_1 in. + the right order. +""" + +sskeyin = environ["SSKEYIN"] +name_0 = environ["NAME_0"] +name_1 = environ["NAME_1"] + +smartsim_separator = "," # this is the current choice + +expected_sskeyin = smartsim_separator.join((name_0, name_1)) + +if sskeyin != expected_sskeyin: + raise ValueError(f"SSKEYIN expected to be {expected_sskeyin}, " + f"but was {sskeyin}") \ No newline at end of file From e1e776f54edb7929ad3dde8418e10f12250896ca Mon Sep 17 00:00:00 2001 From: Al Rigazzi Date: Tue, 26 Oct 2021 12:38:50 -0500 Subject: [PATCH 3/3] Update Slurm tests for new formatting of env var --- tests/test_slurm_settings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_slurm_settings.py b/tests/test_slurm_settings.py index 11e011908..5caf2f57e 100644 --- a/tests/test_slurm_settings.py +++ b/tests/test_slurm_settings.py @@ -46,11 +46,13 @@ def test_update_env(): def test_format_env(): - env_vars = {"OMP_NUM_THREADS": 20, "LOGGING": "verbose"} + env_vars = {"OMP_NUM_THREADS": 20, "LOGGING": "verbose", "SSKEYIN": "name_0,name_1"} settings = SrunSettings("python", env_vars=env_vars) - formatted = settings.format_env_vars() + formatted, comma_separated_formatted = settings.format_env_vars() assert "OMP_NUM_THREADS" in formatted assert "LOGGING" in formatted + assert "SSKEYIN" in formatted + assert "SSKEYIN=name_0,name_1" in comma_separated_formatted # ---- Sbatch ---------------------------------------------------