Skip to content
Snippets Groups Projects
Commit f4555bd5 authored by Karim Ahmed's avatar Karim Ahmed
Browse files

Improve log display

- Remove unneeded text
- Show only known errors
- Show warnings for COMPLETED jobs only
parent 6de29a7e
No related tags found
No related merge requests found
This commit is part of merge request !1088. Comments created here will be created in the context of that merge request.
...@@ -106,20 +106,6 @@ def slurm_job_status(jobid): ...@@ -106,20 +106,6 @@ def slurm_job_status(jobid):
return "NA", "NA", "NA" return "NA", "NA", "NA"
def parse_log_file(file_path):
results = []
with open(file_path, 'r') as file:
for line in file:
try:
log_entry = json.loads(line.strip())
error_message = log_entry.get('message', '')
error_class = log_entry.get('class', '')
results.append((error_message, error_class))
except json.JSONDecodeError:
log.error(f"Skipping invalid JSON: {line.strip()}")
return results
def get_report_dir(command): def get_report_dir(command):
args = shlex.split(command) args = shlex.split(command)
try: try:
...@@ -129,56 +115,30 @@ def get_report_dir(command): ...@@ -129,56 +115,30 @@ def get_report_dir(command):
return return
def process_log_file(job_id, karabo_id, report_dir, karabo_id_log, file): def process_log_file(job_id,karabo_id_log, file):
if file.exists(): with open(file, 'r') as f:
with open(file, 'r') as f: for line in f:
for line in f: try:
try: json_line = json.loads(line)
json_line = json.loads(line) message = json_line['level'] + json_line['message']
if "default" not in json_line['class'].lower(): log_class = json_line['log_class'].strip(".")
message = json_line['message'] if message:
if message: if not any(["calibration" in c.lower() for c in log_class]):
karabo_id_log.setdefault( message = "Error: Uncaught error!"
message, []).append(job_id) karabo_id_log.setdefault(
except json.JSONDecodeError: message, []).append(job_id)
log.error( except json.JSONDecodeError:
f"Invalid JSON in errors file {file}:" log.error(
f" {line.strip()}") f"Invalid JSON in errors file {file}:"
return karabo_id_log f" {line.strip()}")
def compress_job_ids(job_ids):
"""Compress list of job IDs to a shorter representation.
Args:
job_ids (list): List of job IDs
Returns:
str: Compressed representation like "16 jobs (11498126-11498141)"
or "2 jobs (11498142, 11498143)" for non-sequential IDs
"""
if not job_ids:
return "0 jobs"
# Convert to integers and sort
ids = sorted(int(id) for id in job_ids)
# Check if they're sequential
if len(ids) > 2 and ids[-1] - ids[0] + 1 == len(ids):
return f"{len(ids)} jobs ({ids[0]}-{ids[-1]})"
if len(ids) > 4:
return f"{len(ids)} jobs (e.g. {ids[0]}, {ids[1]}...)"
return f"{len(ids)} jobs ({', '.join(str(id) for id in ids)})"
def format_log_message(errors): def format_log_message(logs):
"""Format log messages with compressed job IDs.""" """Format log messages with compressed job IDs."""
formatted = {} formatted = {}
for karabo_id, messages in errors.items(): for karabo_id, messages in logs.items():
formatted[karabo_id] = { formatted[karabo_id] = {
msg: compress_job_ids(job_ids) msg: f" {len(job_ids)} jobs"
for msg, job_ids in messages.items() for msg, job_ids in messages.items()
} }
return formatted return formatted
...@@ -390,27 +350,27 @@ class JobsMonitor: ...@@ -390,27 +350,27 @@ class JobsMonitor:
for job_id, status in job_statuses: for job_id, status in job_statuses:
if status == "COMPLETED": if status == "COMPLETED":
continue # No need to process warnings for failed jobs.
warning_file = report_dir / f"warnings_{job_id}.log"
if warning_file.exists():
karabo_id_warn = warnings.setdefault(karabo_id, {})
process_log_file(job_id, karabo_id_warn, warning_file)
continue # No errors expected for COMPLETED jobs.
if not exec_success: # no errors expected for successful execution. if not exec_success: # no errors expected for successful execution.
karabo_id_err = errors.setdefault(karabo_id, {}) karabo_id_err = errors.setdefault(karabo_id, {})
if status == "FAILED": # process error logs if status == "FAILED": # process error logs
error_file = report_dir / f"errors_{job_id}.log" error_file = report_dir / f"errors_{job_id}.log"
process_log_file( if error_file.exists():
job_id, karabo_id, report_dir, karabo_id_err, error_file) process_log_file(job_id, karabo_id_err, error_file)
if len(karabo_id_err) == 0: if len(karabo_id_err) == 0:
log.warning(f"Job {job_id} failed but no error log/messages found.") log.warning(f"Job {job_id} failed but no error log/messages found.")
karabo_id_err.setdefault( karabo_id_err.setdefault(
"Job failed but no error logs found", []).append(job_id) "Error: Job failed but no error logs found", []).append(job_id)
else: # Job unsucessefull with a status other than `FAILED` else: # Job unsucessefull with a status other than `FAILED`
karabo_id_err.setdefault( karabo_id_err.setdefault(
f"SLURM job terminated with status: {status}", []).append(job_id) f"Error: SLURM job terminated with status: {status}", []).append(job_id)
# Process warning logs
warning_file = report_dir / f"warnings_{job_id}.log"
process_log_file(
job_id, karabo_id, report_dir, karabo_id_err, warning_file)
success = (not krb_ids_failed) success = (not krb_ids_failed)
...@@ -440,11 +400,11 @@ class JobsMonitor: ...@@ -440,11 +400,11 @@ class JobsMonitor:
msg = "Calibration jobs succeeded" msg = "Calibration jobs succeeded"
else: else:
# List success & failure by karabo_id # List success & failure by karabo_id
ok = ', '.join(sorted(krb_ids_success)) if krb_ids_success else 'none' ok = ', '.join(sorted(krb_ids_success)) if krb_ids_success else None
msg = ( msg = ""
f"Succeeded: {ok}; Failed: {', '.join(sorted(krb_ids_failed))} :" if ok:
f" {json.dumps(format_log_message(errors), indent=4)}" msg += f"Succeeded: {ok}; "
) msg += f"Failed: {json.dumps(format_log_message(errors), indent=4)}"
log.debug("Update MDC for %s, %s: %s", r['action'], r['mymdc_id'], msg) log.debug("Update MDC for %s, %s: %s", r['action'], r['mymdc_id'], msg)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment