diff --git a/src/xfel_calibrate/calibrate.py b/src/xfel_calibrate/calibrate.py index 004193424919dc4339e2fe425a41750b67a3f190..e0e43e1f71d29598e73febd050b4e1c5f16cbccf 100755 --- a/src/xfel_calibrate/calibrate.py +++ b/src/xfel_calibrate/calibrate.py @@ -89,7 +89,12 @@ def make_initial_parser(**kwargs): parser.add_argument_group('required arguments') - parser.add_argument('--reservation', type=str, default="") + parser.add_argument('--slurm-partition', type=str, default="", + help="Submit jobs in this Slurm partition") + + parser.add_argument('--reservation', type=str, default="", + help="Submit jobs in this Slurm reservation, " + "overriding --slurm-partition if both are set") return parser @@ -573,7 +578,34 @@ def save_executed_command(run_tmp_path, version): finfile.write(' '.join(sys.argv)) -def get_launcher_command(args, temp_path, dep_jids): +def get_slurm_partition_or_reservation(args) -> List[str]: + """Return sbatch arguments to use a partition or reservation + + --reservation and --slurm-partition options have precedence. + Otherwise, if --priority is <=1, it will use a configured reservation + depending on how many nodes are currently free. + """ + ureservation = args['reservation'] + upartition = args['slurm_partition'] + priority = args['priority'] + relevant_resv = reservation_char if priority <= 0 else reservation + + if ureservation: + return ['--reservation', ureservation] + elif upartition: + return ['--partition', upartition] + elif (priority <= 1) and relevant_resv: + # Use a reservation if there aren't many general nodes available to us + free = int(check_output(free_nodes_cmd, shell=True).decode('utf8')) + preempt = int(check_output(preempt_nodes_cmd, shell=True).decode('utf8')) + if free + preempt < max_reserved: + return ['--reservation', relevant_resv] + + # Fallback to using the configured partition (default: exfel) + return ['--partition', sprof] + + +def get_launcher_command(args, temp_path, dep_jids) -> List[str]: """ Return a slurm launcher command :param args: Command line arguments @@ -582,40 +614,24 @@ def get_launcher_command(args, temp_path, dep_jids): :return: List of commands and parameters to be used by subprocess """ - launcher_slurm = launcher_command.format(temp_path=temp_path) + launcher_slurm = launcher_command.format(temp_path=temp_path).split() - # calculate number of general nodes available - free = int(check_output(free_nodes_cmd, shell=True).decode('utf8')) - preempt = int(check_output(preempt_nodes_cmd, shell=True).decode('utf8')) - ureservation = args['reservation'] - priority = args['priority'] - - if (ureservation == "" and - (free + preempt >= max_reserved or - priority > 1 or - reservation == "")): - launcher_slurm += " --partition {}".format(sprof) - else: - this_res = reservation if priority == 1 else reservation_char - if ureservation != "": - this_res = ureservation - launcher_slurm += " --reservation={}".format(this_res) + launcher_slurm += get_slurm_partition_or_reservation(args) job_name = args.get('slurm_name', 'xfel_calibrate') - launcher_slurm += " --job-name {}".format(job_name) + launcher_slurm += ["--job-name", job_name] if args.get('slurm_priority'): - launcher_slurm += " --nice={}".format(args.get('slurm_priority')) + launcher_slurm += ["--nice", args.get('slurm_priority')] - launcher_slurm += " --mem {}G".format(args.get('slurm_mem', '500')) + launcher_slurm.append("--mem={}G".format(args.get('slurm_mem', '500'))) if len(dep_jids): - srun_dep = " --dependency=afterok" - for jobid in dep_jids: - srun_dep += ":{}".format(jobid) - launcher_slurm += srun_dep + launcher_slurm.append( + "--dependency=afterok:" + ":".join(str(j) for j in dep_jids) + ) - return launcher_slurm.split() + return launcher_slurm def remove_duplications(l):