From a2492532048bac267d54be8f814450c41d7e1e5a Mon Sep 17 00:00:00 2001
From: ahmedk <karim.ahmed@xfel.eu>
Date: Tue, 24 Sep 2024 13:31:59 +0200
Subject: [PATCH] feat: store logs in JSON format and split in different files
 for job_id

---
 bin/slurm_calibrate.sh              |   1 +
 notebooks/test/test-logging.ipynb   |  24 ++++--
 setup.py                            |   1 +
 src/xfel_calibrate/setup_logging.py | 114 ++++++++++++++++++++--------
 4 files changed, 102 insertions(+), 38 deletions(-)

diff --git a/bin/slurm_calibrate.sh b/bin/slurm_calibrate.sh
index a93b5d02a..c3c1d88ad 100755
--- a/bin/slurm_calibrate.sh
+++ b/bin/slurm_calibrate.sh
@@ -26,6 +26,7 @@ echo "hostname: $(hostname)"
 
 export CAL_NOTEBOOK_NAME="$notebook"
 export CAL_WORKING_DIR="$(pwd)"
+export SLURM_JOB_ID="${SLURM_JOB_ID:-local}"
 
 # This is created by calibrate.py in CAL_WORKING_DIR
 setup_logger="setup-logging-nb.py"
diff --git a/notebooks/test/test-logging.ipynb b/notebooks/test/test-logging.ipynb
index 4ad97550f..dc4a7b25d 100644
--- a/notebooks/test/test-logging.ipynb
+++ b/notebooks/test/test-logging.ipynb
@@ -18,7 +18,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from logging import debug, error, info, warning"
+    "import logging"
    ]
   },
   {
@@ -36,8 +36,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "debug(\"(DEBUG)ing for some logs\")\n",
-    "info(\"Logging some (INFO)rmation\")"
+    "logging.info(\"Logging some (INFO)rmation\")"
    ]
   },
   {
@@ -55,9 +54,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from warnings import warn\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "class TestUserWarning(UserWarning):\n",
+    "    \"\"\"Base class for custom user warnings\"\"\"\n",
+    "    pass\n",
+    "\n",
+    "\n",
+    "def warn_user(message, warning_class):\n",
+    "    warnings.warn(message, warning_class)\n",
+    "    \n",
+    "\n",
     "if 1 < 2:\n",
-    "    warn('This inequality is true!')"
+    "    warn_user('This inequality is true!', TestUserWarning)"
    ]
   },
   {
@@ -67,7 +77,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "warning(\"This is a warning message using logging standard library.\")"
+    "logging.warning(\"This is a warning message using logging standard library.\")"
    ]
   },
   {
@@ -85,7 +95,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "error(\"Logging some (ERROR) without failing the notebook\")"
+    "logging.error(\"Logging some (ERROR) without failing the notebook\")"
    ]
   },
   {
diff --git a/setup.py b/setup.py
index b8ce52d6d..41068b4a6 100644
--- a/setup.py
+++ b/setup.py
@@ -92,6 +92,7 @@ install_requires = [
         "pymunge==0.1.3",
         "pypandoc==1.13",
         "python-dateutil==2.8.2",
+        "python-json-logger==2.0.7"
         "pyyaml==6.0.1",
         "pyzmq==25.1.2",
         "requests==2.29.0",
diff --git a/src/xfel_calibrate/setup_logging.py b/src/xfel_calibrate/setup_logging.py
index 1ecc6658f..baf0d6ae3 100644
--- a/src/xfel_calibrate/setup_logging.py
+++ b/src/xfel_calibrate/setup_logging.py
@@ -3,49 +3,96 @@ import os
 import sys
 import traceback
 import warnings
-
 import IPython
+import json
+from pythonjsonlogger import jsonlogger
+
+NOTEBOOK_NAME = os.getenv('CAL_NOTEBOOK_NAME', 'Unknown notebook')
+CURRENT_WORKING_DIR = os.getenv(
+    'CAL_WORKING_DIR', 'Unknown directory')
+JOB_ID = os.getenv('SLURM_JOB_ID', 'local')
 
 
 class ContextFilter(logging.Filter):
     def filter(self, record):
         record.notebook = NOTEBOOK_NAME
         record.directory = CURRENT_WORKING_DIR
+        record.job_id = JOB_ID
         return True
 
 
-# Define a custom log format
-formatter = logging.Formatter(
-    '%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - '
-    '[Notebook: %(notebook)s] - [Directory: %(directory)s] - %(message)s'
-)
+class CustomJsonFormatter(jsonlogger.JsonFormatter):
+    def add_fields(self, log_record, record, message_dict):
+        super(CustomJsonFormatter, self).add_fields(
+            log_record, record, message_dict)
+        log_record['timestamp'] = self.formatTime(record, self.datefmt)
+        log_record['level'] = record.levelname
+        log_record['filename'] = record.filename
+        log_record['lineno'] = record.lineno
+        log_record['class'] = getattr(record, 'class', 'DefaultClass')
+        if record.exc_info:
+            log_record['exc_info'] = self.formatException(record.exc_info)
+
+
+class JSONArrayHandler(logging.FileHandler):
+    def __init__(self, filename, mode='a', encoding=None, delay=False):
+        super().__init__(filename, mode, encoding, delay)
+        self.file_empty = True
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)
+            json_msg = json.loads(msg)
+
+            with self.lock:
+                if self.stream is None:
+                    self.stream = self._open()
+
+                if self.file_empty:
+                    self.stream.write('[\n')
+                    self.file_empty = False
+                else:
+                    self.stream.write(',\n')
+
+                json.dump(json_msg, self.stream, indent=2)
+                self.stream.flush()
+        except Exception:
+            self.handleError(record)
+
+    def close(self):
+        if self.stream is not None:
+            with self.lock:
+                if not self.file_empty:
+                    self.stream.write('\n]')
+            super().close()
 
-# Retrieve notebook name and current working directory from
-# environment variables
-NOTEBOOK_NAME = os.getenv('CAL_NOTEBOOK_NAME', 'Unknown notebook')
-CURRENT_WORKING_DIR = os.getenv('CAL_WORKING_DIR', 'Unknown directory')
 
 # Create a logger
 logger = logging.getLogger()
-logger.setLevel(logging.DEBUG)  # Capture all levels
+logger.setLevel(logging.INFO)
 
-# Create file handlers
-error_handler = logging.FileHandler('errors.log')
-error_handler.setLevel(logging.ERROR)
-error_handler.setFormatter(formatter)
+# Define a custom JSON format
+formatter = CustomJsonFormatter(
+    '%(timestamp)s %(level)s %(filename)s %(lineno)d '
+    '%(notebook)s %(directory)s %(job_id)s %(class)s %(message)s')
 
-warning_handler = logging.FileHandler('warnings.log')
-warning_handler.setLevel(logging.WARNING)
-# Avoid errors being logged in warnings.log
-warning_handler.addFilter(lambda record: record.levelno < logging.ERROR)
-warning_handler.setFormatter(formatter)
 
-# In case any info is logged and for logged data
-# by other packages like calibration_client.
-info_handler = logging.FileHandler('info.log')
-info_handler.setLevel(logging.DEBUG)
-info_handler.setFormatter(formatter)
+# Function to create a file handler with job-specific JSON log file
+def create_job_specific_handler(log_level, file_suffix):
+    log_file = f'{file_suffix}_{JOB_ID}.json'
+    handler = JSONArrayHandler(log_file, mode='a+', delay=True)
+    handler.setLevel(log_level)
+    handler.setFormatter(formatter)
+    return handler
+
 
+# Create job-specific file handlers
+error_handler = create_job_specific_handler(logging.ERROR, 'errors')
+warning_handler = create_job_specific_handler(logging.WARNING, 'warnings')
+info_handler = create_job_specific_handler(logging.DEBUG, 'info')
+
+# Avoid errors being logged in warnings.json
+warning_handler.addFilter(lambda record: record.levelno < logging.ERROR)
 
 # Add the custom filter to handlers
 context_filter = ContextFilter()
@@ -68,20 +115,20 @@ def safe_handle_error(exc_type, exc_value, exc_traceback):
         traceback.print_exception(
             exc_type, exc_value, exc_traceback, file=sys.stderr)
         return
-
     handling_error = True
     try:
-        # Log the error with the notebook name and additional metadata
+        # Log the error with the notebook name, job ID, and additional metadata
         logger.error(
             "An error occurred. Exception type: %s, Message: %s",
             exc_type.__name__, str(exc_value),
             extra={
                 'notebook': NOTEBOOK_NAME,
-                'directory': CURRENT_WORKING_DIR
+                'directory': CURRENT_WORKING_DIR,
+                'job_id': JOB_ID,
+                'class': exc_type.__name__ if exc_type else 'DefaultErrorClass',  # noqa
             },
             exc_info=(exc_type, exc_value, exc_traceback)
         )
-
     except Exception as log_error:
         sys.stderr.write(f"Logging failed: {log_error}\n")
         traceback.print_exception(
@@ -92,11 +139,15 @@ def safe_handle_error(exc_type, exc_value, exc_traceback):
 
 def handle_warning(message, category, filename, lineno, file=None, line=None):
     try:
-        # Log the warning with the notebook name and additional metadata
         logger.warning(
             "Warning occurred: %s, File: %s, Line: %d",
             message, filename, lineno,
-            extra={'notebook': NOTEBOOK_NAME, 'directory': CURRENT_WORKING_DIR}
+            extra={
+                'notebook': NOTEBOOK_NAME,
+                'directory': CURRENT_WORKING_DIR,
+                'job_id': JOB_ID,
+                'class': category.__name__ if category else 'DefaultWarningClass',  # noqa
+            }
         )
     except Exception as log_error:
         sys.stderr.write(f"Logging failed: {log_error}\n")
@@ -111,4 +162,5 @@ warnings.showwarning = handle_warning
 def custom_showtraceback(self, *args, **kwargs):
     return safe_handle_error(*sys.exc_info())
 
+
 IPython.core.interactiveshell.InteractiveShell.showtraceback = custom_showtraceback  # noqa
-- 
GitLab