From fe384bb85fb022acf2fac17e133e8aa3ed763c73 Mon Sep 17 00:00:00 2001
From: Thomas Kluyver <thomas@kluyver.me.uk>
Date: Mon, 8 Jan 2024 11:43:58 +0000
Subject: [PATCH] Add script to backfill reports

---
 webservice/add_reports.py | 73 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 webservice/add_reports.py

diff --git a/webservice/add_reports.py b/webservice/add_reports.py
new file mode 100644
index 000000000..2766fe274
--- /dev/null
+++ b/webservice/add_reports.py
@@ -0,0 +1,73 @@
+"""Backfill reports to myMdC from job databases before we started injecting them"""
+import argparse
+import os.path
+import shlex
+import sys
+import sqlite3
+
+from .config import webservice as config
+from .webservice import init_md_client
+
+ap = argparse.ArgumentParser()
+ap.add_argument("db-file")
+ap.add_argument("--really", action="store_true")
+args = ap.parse_args()
+
+db_file = sys.argv[1]
+conn = sqlite3.connect(args.db_file)
+conn.row_factory = sqlite3.Row
+
+mdc = init_md_client(config)
+print("MyMdC API is:", mdc.base_api_url)
+
+rows = conn.execute(
+    "SELECT det_type, karabo_id, command, success, "
+    "req_id, proposal, run, action, mymdc_id, timestamp "
+    "FROM executions JOIN requests USING (req_id) "
+    "WHERE success IS NOT NULL"
+).fetchall()
+
+nreports = 0
+for i, r in enumerate(rows):
+    cmd_args = shlex.split(r["command"])
+    try:
+        report_path = cmd_args[cmd_args.index("--report-to") + 1] + ".pdf"
+    except (ValueError, IndexError):
+        print("Couldn't find report path in %r", cmd_args)
+        continue
+
+    if not os.path.isfile(report_path):
+        print(f"Report file {report_path} missing (p{r['proposal']}, r{r['run']}")
+        continue
+
+    desc = f"{r['karabo_id']} detector corrections"
+    if not r["success"]:
+        desc += " (errors occurred)"
+
+    nreports += 1
+    if not args.really:
+        continue
+
+    response = mdc.create_report_api(
+        {
+            "name": os.path.basename(report_path),
+            "cal_report_path": os.path.dirname(report_path).rstrip("/") + "/",
+            "cal_report_at": r["timestamp"],
+            "run_id": r["mymdc_id"],
+            "description": desc,
+        }
+    )
+
+    if response.status_code >= 400:
+        print(
+            f"Failed to add report to MDC for run ID {r['mymdc_id']}: "
+            f"HTTP status {response.status_code}",
+        )
+    if i % 20 == 0:
+        print(f"Done {i}")
+
+print(f"Found {nreports} reports from {len(rows)} executions")
+if args.really:
+    print(f"  Injected to myMdC")
+else:
+    print(f"  Re-run with --really to add to myMdC")
-- 
GitLab