From ed59542e873c34790d25adaed23420eb2bd9e174 Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Tue, 8 Mar 2022 18:56:05 +0100 Subject: [PATCH] remove source selection and only select instr sources for the n_trains check part --- .../Characterize_AGIPD_Gain_Darks_NBC.ipynb | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb index be96463e0..32068cbac 100644 --- a/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb +++ b/notebooks/AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb @@ -209,43 +209,31 @@ "# Create out_folder if it doesn't exist.\n", "Path(out_folder).mkdir(parents=True, exist_ok=True)\n", "\n", - "n_files = 0\n", - "total_file_sizes = 0\n", "max_trains_list = []\n", - "\n", + "file_sizes = []\n", "for run_dict in runs_dict.values():\n", " missing_modules = []\n", - " image_dc = run_dict[\"dc\"].select(f\"{karabo_id_control}*\", \"*\", require_all=True)\n", " # This is important in case of no slurm parallelization over modules is done.\n", " # (e.g. running notebook interactively)\n", - " sources_l = [(f\"{karabo_id_control}*\", \"*\")]\n", - " sources_l += [(instrument_src.format(m), \"*\") for m in modules]\n", - " image_dc = run_dict[\"dc\"].select(sources_l, require_all=True)\n", + " dc = run_dict[\"dc\"].select(\n", + " [(instrument_src.format(m), \"*\") for m in modules],\n", + " require_all=True\n", + " )\n", " # validate that there are trains and that data sources are\n", " # present for any of the selected modules.\n", - " if (\n", - " len(image_dc.train_ids) == 0 or\n", - " not np.any([\n", - " karabo_id in s for s in run_dict[\"dc\"].select(sources_l, require_all=True).all_sources]) # noqa\n", - " ):\n", - " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", + " n_trains = len(dc.train_ids)\n", "\n", - " max_trains_list.append(len(image_dc.train_ids))\n", + " if n_trains == 0:\n", + " raise ValueError(f\"No images to process for run: {run_dict['number']}\")\n", "\n", - " # update run_dc with selected module sources\n", - " run_dict[\"dc\"] = image_dc\n", + " max_trains_list.append(n_trains)\n", + " file_sizes += [os.path.getsize(f.filename) / 1e9 for f in dc.files]\n", "\n", "# Update modules and karabo_da lists based on available modules to processes.\n", "modules = [m for m in modules if m not in missing_modules]\n", "karabo_da = create_karabo_da_list(modules)\n", "\n", - "# Remodifing run data collections to display actual total files number and size. \n", - "for run_dict in runs_dict.values():\n", - " file_sizes = [os.path.getsize(f.filename) / 1e9 for f in run_dict[\"dc\"].deselect(f\"{karabo_id_control}*\").files]\n", - " total_file_sizes += sum(file_sizes)\n", - " n_files += len(file_sizes)\n", - "\n", - "print(f\"Will process data in a total of {n_files} files ({total_file_sizes:.02f} GB).\")" + "print(f\"Will process data in a total of {len(file_sizes)} files ({sum(file_sizes):.02f} GB).\")" ] }, { -- GitLab