From 9040acd1fc89d13e581b3d33f8fb3eee6057ce11 Mon Sep 17 00:00:00 2001 From: Robert Rosca <robert.rosca@xfel.eu> Date: Fri, 13 May 2022 14:49:57 +0200 Subject: [PATCH] Improve xfel_calibrate process error logging --- webservice/webservice.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/webservice/webservice.py b/webservice/webservice.py index f15aefbae..3f9963729 100644 --- a/webservice/webservice.py +++ b/webservice/webservice.py @@ -238,14 +238,16 @@ def change_config(config, updated_config, instrument, return yaml.safe_dump(new_conf, default_flow_style=False).encode() -async def run_proc_async(cmd: List[str]) -> Tuple[int, bytes]: +async def run_proc_async(cmd: List[str]) -> Tuple[Optional[int], bytes, bytes]: """Run a subprocess to completion using asyncio, capturing stdout Returns the numeric exit code and stdout (bytes) """ - proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE) - stdout, _ = await proc.communicate() - return proc.returncode, stdout + proc = await asyncio.create_subprocess_exec( + *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await proc.communicate() + return proc.returncode, stdout, stderr def slurm_status(filter_user=True): @@ -479,9 +481,10 @@ async def run_action(job_db, cmd, mode, proposal, run, rid) -> str: """ if mode == "prod": logging.info(" ".join(cmd)) - retcode, stdout = await run_proc_async(cmd) + retcode, stdout, stderr = await run_proc_async(cmd) if retcode != 0: logging.error(Errors.JOB_LAUNCH_FAILED.format(cmd, retcode)) + logging.error(f"{stdout=}, {stderr=}") return Errors.JOB_LAUNCH_FAILED.format(cmd, retcode) if "DARK" in cmd: @@ -741,12 +744,13 @@ async def get_slurm_nice(partition: str, instrument: str, return 0 # Don't apply degressive priority on exfel. # List all names for jobs running in the specified partition. - returncode, job_names = await run_proc_async( + returncode, job_names, stderr = await run_proc_async( ['squeue', '-h', '-o', '%.20j', '-p', partition, '--me']) if returncode != 0: logging.error(f'Non-zero return code {returncode} from ' f'`squeue` upon counting number of jobs') + logging.warning(f"{stderr=}") return 0 # Fallback if something went wrong. # Base value depending on proposal type using cycle, assuming that -- GitLab