Skip to content
Snippets Groups Projects
Commit ec929769 authored by Mikhail Karnevskiy's avatar Mikhail Karnevskiy
Browse files

Merge branch 'fix/slurn_give_none' into 'master'

Fix: Add check for slurm exceptions

See merge request detectors/pycalibration!268
parents ca5fbc6c b7c5f3b1
No related branches found
No related tags found
1 merge request!268Fix: Add check for slurm exceptions
...@@ -288,54 +288,64 @@ async def update_job_db(config): ...@@ -288,54 +288,64 @@ async def update_job_db(config):
logging.info("Starting config db handling") logging.info("Starting config db handling")
conn = await init_job_db(config) conn = await init_job_db(config)
mdc = await init_md_client(config) mdc = await init_md_client(config)
time_interval = int(config['web-service']['job-update-interval'])
while True: while True:
statii = await slurm_status() statii = await slurm_status()
c = conn.cursor() # Check that slurm is giving proper feedback
c.execute("SELECT * FROM jobs WHERE status IN ('R', 'PD', 'CG') ") if statii is None:
combined = {} await asyncio.sleep(time_interval)
logging.debug("SLURM info {}".format(statii)) continue
try:
for r in c.fetchall(): c = conn.cursor()
rid, jobid, proposal, run, status, time, _, _ = r c.execute("SELECT * FROM jobs WHERE status IN ('R', 'PD', 'CG') ")
logging.debug("DB info {}".format(r)) combined = {}
logging.debug("SLURM info {}".format(statii))
cflg, cstatus = combined.get(rid, ([], []))
if jobid in statii: for r in c.fetchall():
slstatus, runtime = statii[jobid] rid, jobid, proposal, run, status, time, _, _ = r
query = "UPDATE jobs SET status='{status}', time='{runtime}' WHERE jobid LIKE '{jobid}'" # noqa logging.debug("DB info {}".format(r))
c.execute(query.format(status=slstatus,
runtime=runtime, cflg, cstatus = combined.get(rid, ([], []))
jobid=jobid)) if jobid in statii:
slstatus, runtime = statii[jobid]
cflg.append('R') query = "UPDATE jobs SET status='{status}', time='{runtime}' WHERE jobid LIKE '{jobid}'" # noqa
cstatus.append("{}-{}".format(slstatus, runtime)) c.execute(query.format(status=slstatus,
else: runtime=runtime,
_, sltime, slstatus = await slurm_job_status(jobid) jobid=jobid))
query = "UPDATE jobs SET status='{slstatus}' WHERE jobid LIKE '{jobid}'" # noqa
c.execute(query.format(jobid=jobid, slstatus=slstatus)) cflg.append('R')
cstatus.append("{}-{}".format(slstatus, runtime))
if slstatus == 'COMPLETED':
cflg.append("A")
else: else:
cflg.append("NA") _, sltime, slstatus = await slurm_job_status(jobid)
cstatus.append(slstatus) query = "UPDATE jobs SET status='{slstatus}' WHERE jobid LIKE '{jobid}'" # noqa
combined[rid] = cflg, cstatus c.execute(query.format(jobid=jobid, slstatus=slstatus))
conn.commit()
if slstatus == 'COMPLETED':
flg_order = {"R": 2, "A": 1, "NA": 0} cflg.append("A")
for rid, value in combined.items(): else:
if int(rid) == 0: cflg.append("NA")
continue cstatus.append(slstatus)
flgs, statii = value combined[rid] = cflg, cstatus
flg = max(flgs, key=lambda i: flg_order[i]) conn.commit()
msg = "\n".join(statii)
logging.debug("Update MDC {}, {}".format(rid, flg_order = {"R": 2, "A": 1, "NA": 0}
msg.replace('\n', ', '))) for rid, value in combined.items():
response = mdc.update_run_api(rid, {'flg_cal_data_status': flg, if int(rid) == 0:
'cal_pipeline_reply': msg}) continue
if response.status_code != 200: flgs, statii = value
logging.error(Errors.MDC_RESPONSE.format(response)) flg = max(flgs, key=lambda i: flg_order[i])
await asyncio.sleep(int(config['web-service']['job-update-interval'])) msg = "\n".join(statii)
msg_debug = f"Update MDC {rid}, {msg}"
logging.debug(msg_debug.replace('\n', ', '))
response = mdc.update_run_api(rid, {'flg_cal_data_status': flg,
'cal_pipeline_reply': msg})
if response.status_code != 200:
logging.error(Errors.MDC_RESPONSE.format(response))
except Exception as e:
e = str(e)
logging.error(f"Failure to update job DB: {e}")
await asyncio.sleep(time_interval)
async def copy_untouched_files(file_list, out_folder, run): async def copy_untouched_files(file_list, out_folder, run):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment