From 9040acd1fc89d13e581b3d33f8fb3eee6057ce11 Mon Sep 17 00:00:00 2001
From: Robert Rosca <robert.rosca@xfel.eu>
Date: Fri, 13 May 2022 14:49:57 +0200
Subject: [PATCH] Improve xfel_calibrate process error logging

---
 webservice/webservice.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/webservice/webservice.py b/webservice/webservice.py
index f15aefbae..3f9963729 100644
--- a/webservice/webservice.py
+++ b/webservice/webservice.py
@@ -238,14 +238,16 @@ def change_config(config, updated_config, instrument,
     return yaml.safe_dump(new_conf, default_flow_style=False).encode()
 
 
-async def run_proc_async(cmd: List[str]) -> Tuple[int, bytes]:
+async def run_proc_async(cmd: List[str]) -> Tuple[Optional[int], bytes, bytes]:
     """Run a subprocess to completion using asyncio, capturing stdout
 
     Returns the numeric exit code and stdout (bytes)
     """
-    proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
-    stdout, _ = await proc.communicate()
-    return proc.returncode, stdout
+    proc = await asyncio.create_subprocess_exec(
+        *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+    )
+    stdout, stderr = await proc.communicate()
+    return proc.returncode, stdout, stderr
 
 
 def slurm_status(filter_user=True):
@@ -479,9 +481,10 @@ async def run_action(job_db, cmd, mode, proposal, run, rid) -> str:
     """
     if mode == "prod":
         logging.info(" ".join(cmd))
-        retcode, stdout = await run_proc_async(cmd)
+        retcode, stdout, stderr = await run_proc_async(cmd)
         if retcode != 0:
             logging.error(Errors.JOB_LAUNCH_FAILED.format(cmd, retcode))
+            logging.error(f"{stdout=}, {stderr=}")
             return Errors.JOB_LAUNCH_FAILED.format(cmd, retcode)
 
         if "DARK" in cmd:
@@ -741,12 +744,13 @@ async def get_slurm_nice(partition: str, instrument: str,
         return 0  # Don't apply degressive priority on exfel.
 
     # List all names for jobs running in the specified partition.
-    returncode, job_names = await run_proc_async(
+    returncode, job_names, stderr = await run_proc_async(
         ['squeue', '-h', '-o', '%.20j', '-p', partition, '--me'])
 
     if returncode != 0:
         logging.error(f'Non-zero return code {returncode} from '
                       f'`squeue` upon counting number of jobs')
+        logging.warning(f"{stderr=}")
         return 0  # Fallback if something went wrong.
 
     # Base value depending on proposal type using cycle, assuming that
-- 
GitLab