diff --git a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb index dc60b39773932ecf5690915ddc8591e4937859c5..6b3d7b71146b03ec10859f336733b040775dd0d6 100644 --- a/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb +++ b/notebooks/DSSC/Characterize_DSSC_Darks_NBC.ipynb @@ -28,16 +28,25 @@ "in_folder = \"/gpfs/exfel/exp/SCS/201931/p900095/raw\" # path to input data, required\n", "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/DSSC\" # path to output to, required\n", "sequences = [0] # sequence files to evaluate.\n", - "\n", + "modules = [-1] # modules to run for\n", "run = 1497 # run number in which data was recorded, required\n", "\n", - "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", + "karabo_id = \"SCS_DET_DSSC1M-1\" # karabo karabo_id\n", + "karabo_da = [-1] # data aggregators\n", + "receiver_id = \"{}CH0\" # inset for receiver devices\n", + "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", + "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", + "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", + "\n", + "dont_use_dir_date = True # don't use the dir creation date for determining the creation time\n", + "cal_db_interface = \"tcp://max-exfl016:8020\" # the database interface to use\n", + "cal_db_timeout = 3000000 # timeout on caldb requests\"\n", "local_output = True # output constants locally\n", "db_output = False # output constants to database\n", + "\n", + "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", "bias_voltage = 300 # detector bias voltage\n", - "cal_db_interface = \"tcp://max-exfl016:8020\" # the database interface to use\n", "rawversion = 2 # RAW file format version\n", - "dont_use_dir_date = True # don't use the dir creation date for determining the creation time\n", "\n", "thresholds_offset_sigma = 3. # thresholds in terms of n sigma noise for offset deduced bad pixels\n", "thresholds_offset_hard = [4000, 8500] # thresholds in absolute ADU terms for offset deduced bad pixels\n", @@ -46,8 +55,7 @@ "thresholds_noise_hard = [4, 20] # thresholds in absolute ADU terms for offset deduced bad pixels\n", "\n", "instrument = \"SCS\" # the instrument\n", - "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h\n", - "modules = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] # modules to run for" + "high_res_badpix_3d = False # set this to True if you need high-resolution 3d bad pixel plots. Runtime: ~ 1h" ] }, { @@ -74,7 +82,7 @@ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", - "from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path, \n", + "from cal_tools.tools import (map_gain_stages, parse_runs, run_prop_seq_from_path, \n", " get_notebook_name, get_dir_creation_date,\n", " get_random_db_interface)\n", "from cal_tools.influx import InfluxLogger\n", @@ -90,16 +98,21 @@ "from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions\n", "\n", "\n", - "# no need to change this\n", + "h5path = h5path.format(karabo_id, receiver_id)\n", + "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", "\n", - "QUADRANTS = 4\n", - "MODULES_PER_QUAD = 4\n", - "DET_FILE_INSET = \"DSSC\"\n", "\n", + "if karabo_da[0] == -1:\n", + " if modules[0] == -1:\n", + " modules = list(range(16))\n", + " karabo_da = [\"DSSC{:02d}\".format(i) for i in modules]\n", + "else:\n", + " modules = [int(x[-2:]) for x in karabo_da]\n", + " \n", "max_cells = mem_cells\n", " \n", "offset_runs = OrderedDict()\n", - "offset_runs[\"high\"] = parse_runs(run)[0]\n", + "offset_runs[\"high\"] = run\n", "\n", "creation_time=None\n", "if not dont_use_dir_date:\n", @@ -110,14 +123,11 @@ "logger = InfluxLogger(detector=\"DSSC\", instrument=instrument, mem_cells=mem_cells,\n", " notebook=get_notebook_name(), proposal=prop)\n", "\n", - "print(\"Using {} as creation time of constant.\".format(creation_time))\n", - "\n", - "loc = None\n", - "if instrument == \"SCS\":\n", - " loc = \"SCS_DET_DSSC1M-1\"\n", - " dinstance = \"DSSC1M1\"\n", + "print(f\"Using {creation_time} as creation time of constant.\")\n", + " \n", + "dinstance = \"DSSC1M1\"\n", "\n", - "print(\"Detector in use is {}\".format(loc)) \n", + "print(f\"Detector in use is {karabo_id}\") \n", "\n", "cal_db_interface = get_random_db_interface(cal_db_interface)" ] @@ -134,14 +144,14 @@ "outputs": [], "source": [ "print(\"Parameters are:\")\n", - "print(\"Proposal: {}\".format(prop))\n", - "print(\"Memory cells: {}/{}\".format(mem_cells, max_cells))\n", + "print(f\"Proposal: {prop}\")\n", + "print(f\"Memory cells: {mem_cells}/{max_cells}\")\n", "print(\"Runs: {}\".format([ v for v in offset_runs.values()]))\n", - "print(\"Sequences: {}\".format(sequences))\n", - "print(\"Using DB: {}\".format(db_output))\n", - "print(\"Input: {}\".format(in_folder))\n", - "print(\"Output: {}\".format(out_folder))\n", - "print(\"Bias voltage: {}V\".format(bias_voltage))" + "print(f\"Sequences: {sequences}\")\n", + "print(f\"Using DB: {db_output}\")\n", + "print(f\"Input: {in_folder}\")\n", + "print(f\"Output: {out_folder}\")\n", + "print(f\"Bias voltage: {bias_voltage}V\")" ] }, { @@ -163,12 +173,10 @@ "outputs": [], "source": [ "# set everything up filewise\n", - "if not os.path.exists(out_folder):\n", - " os.makedirs(out_folder)\n", - "\n", - "gmf = gain_map_files(in_folder, offset_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)\n", + "os.makedirs(out_folder, exist_ok=True)\n", + "gmf = map_gain_stages(in_folder, offset_runs, path_template, karabo_da, sequences)\n", "gain_mapped_files, total_sequences, total_file_size = gmf\n", - "print(\"Will process at total of {} sequences: {:0.2f} GB of data.\".format(total_sequences, total_file_size))" + "print(f\"Will process a total of {total_sequences} sequences.\")" ] }, { @@ -193,7 +201,7 @@ "source": [ "import copy\n", "from functools import partial\n", - "def characterize_module(cells, bp_thresh, rawversion, loc, inp):\n", + "def characterize_module(cells, bp_thresh, rawversion, karabo_id, h5path, h5path_idx, inp):\n", " import numpy as np\n", " import copy\n", " import h5py\n", @@ -203,10 +211,10 @@ " import struct\n", " import binascii\n", " \n", - " def get_num_cells(fname, loc, module):\n", + " def get_num_cells(fname, h5path):\n", " with h5py.File(fname, \"r\") as f:\n", "\n", - " cells = f[\"INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId\".format(loc, module)][()]\n", + " cells = f[f\"{h5path}/cellId\"][()]\n", " maxcell = np.max(cells)\n", " options = [100, 200, 400, 500, 600, 700, 800]\n", " dists = np.array([(o-maxcell) for o in options])\n", @@ -215,9 +223,11 @@ " \n", " filename, filename_out, channel = inp\n", " \n", - "\n", + " h5path = h5path.format(channel)\n", + " h5path_idx = h5path_idx.format(channel)\n", + " \n", " if cells == 0:\n", - " cells = get_num_cells(filename, loc, channel)\n", + " cells = get_num_cells(filename, h5path)\n", "\n", " pulseid_checksum = None\n", "\n", @@ -225,23 +235,23 @@ "\n", " infile = h5py.File(filename, \"r\", driver=\"core\")\n", " if rawversion == 2:\n", - " count = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/count\".format(loc, channel)])\n", - " first = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/first\".format(loc, channel)])\n", + " count = np.squeeze(infile[f\"{h5path_idx}/count\"])\n", + " first = np.squeeze(infile[f\"{h5path_idx}/first\"])\n", " last_index = int(first[count != 0][-1]+count[count != 0][-1])\n", " first_index = int(first[count != 0][0])\n", - " pulseids = infile[\"INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId\".format(loc, channel)][first_index:int(first[count != 0][1])]\n", + " pulseids = infile[f\"{h5path}/pulseId\"][first_index:int(first[count != 0][1])]\n", " bveto = blake2b(pulseids.data, digest_size=8)\n", " pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]\n", " else:\n", - " status = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/status\".format(loc, channel)])\n", + " status = np.squeeze(infile[f\"{h5path_idx}/status\"])\n", " if np.count_nonzero(status != 0) == 0:\n", " return\n", - " last = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/last\".format(loc, channel)])\n", - " first = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/first\".format(loc, channel)])\n", + " last = np.squeeze(infile[f\"{h5path_idx}/last\"])\n", + " first = np.squeeze(infile[f\"{h5path_idx}/first\"])\n", " last_index = int(last[status != 0][-1]) + 1\n", " first_index = int(first[status != 0][0])\n", - " im = np.array(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data\".format(loc, channel)][first_index:last_index,...]) \n", - " cellIds = np.squeeze(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId\".format(loc, channel)][first_index:last_index,...]) \n", + " im = np.array(infile[f\"{h5path}/data\"][first_index:last_index,...]) \n", + " cellIds = np.squeeze(infile[f\"{h5path}/cellId\"][first_index:last_index,...]) \n", " \n", " infile.close()\n", "\n", @@ -311,7 +321,7 @@ " first = False\n", " p = partial(characterize_module, max_cells,\n", " (thresholds_offset_hard, thresholds_offset_sigma,\n", - " thresholds_noise_hard, thresholds_noise_sigma), rawversion, loc)\n", + " thresholds_noise_hard, thresholds_noise_sigma), rawversion, karabo_id, h5path, h5path_idx)\n", " results = list(map(p, inp))\n", " \n", " for ii, r in enumerate(results):\n", @@ -337,7 +347,7 @@ " filesize=total_file_size)\n", "logger.send()\n", "max_cells = np.max(all_cells)\n", - "print(\"Using {} memory cells\".format(max_cells))" + "print(f\"Using {max_cells} memory cells\")" ] }, { @@ -361,7 +371,7 @@ " \n", "if local_output:\n", " for qm in offset_g.keys():\n", - " ofile = \"{}/dssc_offset_store_{}_{}.h5\".format(out_folder, \"_\".join(offset_runs.values()), qm)\n", + " ofile = \"{}/dssc_offset_store_{}_{}.h5\".format(out_folder, run, qm)\n", " store_file = h5py.File(ofile, \"w\")\n", " store_file[\"{}/Offset/0/data\".format(qm)] = offset_g[qm]\n", " store_file[\"{}/Noise/0/data\".format(qm)] = noise_g[qm]\n", @@ -403,7 +413,7 @@ " metadata.calibration_constant_version = Versions.Now(device=device)\n", " else:\n", " metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)\n", - " metadata.send(cal_db_interface, timeout=3000000)\n", + " metadata.send(cal_db_interface, timeout=cal_db_timeout)\n", "\n", "\n", " metadata = ConstantMetaData()\n", @@ -421,7 +431,7 @@ " metadata.calibration_constant_version = Versions.Now(device=device)\n", " else:\n", " metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)\n", - " metadata.send(cal_db_interface, timeout=3000000)\n", + " metadata.send(cal_db_interface, timeout=cal_db_timeout)\n", "\n", " continue # no bad pixels yet\n", " metadata = ConstantMetaData()\n", @@ -439,7 +449,7 @@ " metadata.calibration_constant_version = Versions.Now(device=device)\n", " else:\n", " metadata.calibration_constant_version = Versions.Timespan(device=device, start=creation_time)\n", - " metadata.send(cal_db_interface, timeout=3000000)\n", + " metadata.send(cal_db_interface, timeout=cal_db_timeout)\n", " except Exception as e:\n", " print(e)" ] @@ -478,7 +488,7 @@ "cell = 9\n", "gain = 0\n", "out_folder = None\n", - "show_overview(res, cell, gain, out_folder=out_folder, infix=\"_\".join(offset_runs.values()))" + "show_overview(res, cell, gain, out_folder=out_folder, infix=\"_{}\".format(run))" ] }, { @@ -528,7 +538,7 @@ "outputs": [], "source": [ "create_constant_overview(offset_g, \"Offset (ADU)\", max_cells,\n", - " out_folder=out_folder, infix=\"_\".join(offset_runs.values()), entries=1)" + " out_folder=out_folder, infix=\"_{}\".format(run), entries=1)" ] }, { @@ -540,7 +550,7 @@ "outputs": [], "source": [ "create_constant_overview(noise_g, \"Noise (ADU)\", max_cells, 0, 100,\n", - " out_folder=out_folder, infix=\"_\".join(offset_runs.values()), entries=1)" + " out_folder=out_folder, infix=\"_{}\".format(run), entries=1)" ] }, { @@ -553,7 +563,7 @@ "for m, d in badpix_g.items():\n", " bad_pixel_aggregate_g[m] = d.astype(np.bool).astype(np.float)\n", "create_constant_overview(bad_pixel_aggregate_g, \"Bad pixel fraction\", max_cells, entries=1,\n", - " out_folder=out_folder, infix=\"_\".join(offset_runs.values()))" + " out_folder=out_folder, infix=\"_{}\".format(run))" ] }, { @@ -587,7 +597,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb b/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb index f44b05119a3b2d2f170cd72a2e0a51049e2334e7..79fa9bf4fd768c06fa6607abfea6bb7b6d85c99d 100644 --- a/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb +++ b/notebooks/DSSC/DSSC_Correct_and_Verify.ipynb @@ -22,21 +22,30 @@ }, "outputs": [], "source": [ - "in_folder = \"/gpfs/exfel/exp/SCS/201931/p900095/raw/\" # the folder to read data from, required\n", - "run = 1520 #runs to process, required\n", - "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/DSSC\" # the folder to output to, required\n", - "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", + "cluster_profile = \"noDB\" # The ipcluster profile to use\n", + "in_folder = \"/gpfs/exfel/exp/CALLAB/202031/p900113/raw\" # path to input data, required\n", + "out_folder = \"/gpfs/exfel/data/scratch/karnem/test/DSSC\" # path to output to, required\n", + "sequences = [-1] # sequence files to evaluate.\n", + "modules = [-1] # modules to correct, set to -1 for all, range allowed\n", + "run = 9987 #runs to process, required\n", + "\n", + "karabo_id = \"SCS_DET_DSSC1M-1\" # karabo karabo_id\n", + "karabo_da = [-1] # data aggregators\n", + "receiver_id = \"{}CH0\" # inset for receiver devices\n", + "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", + "h5path = 'INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", + "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", + "\n", + "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", + "cal_db_interface = \"tcp://max-exfl017:8020#8025\" # the database interface to use\n", + "cal_db_timeout = 300000 # in milli seconds\n", + "\n", "mem_cells = 0 # number of memory cells used, set to 0 to automatically infer\n", "overwrite = True # set to True if existing data should be overwritten\n", - "cluster_profile = \"noDB\" # cluster profile to use\n", "max_pulses = 500 # maximum number of pulses per train\n", "bias_voltage = 100 # detector bias voltage\n", - "cal_db_interface = \"tcp://max-exfl016:8020#8025\" # the database interface to use\n", - "use_dir_creation_date = True # use the creation data of the input dir for database queries\n", "sequences_per_node = 1 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel\n", - "cal_db_timeout = 300000 # in milli seconds\n", "chunk_size_idim = 1 # chunking size of imaging dimension, adjust if user software is sensitive to this.\n", - "instrument = \"SCS\" # the instrument the detector is installed at, required\n", "mask_noisy_asic = 0.25 # set to a value other than 0 and below 1 to mask entire ADC if fraction of noisy pixels is above\n", "offset_image = \"PP\" # last one\n", "mask_cold_asic = 0.25 # mask cold ASICS if number of pixels with negligable standard deviation is larger than this fraction\n", @@ -44,25 +53,9 @@ "geo_file = \"/gpfs/exfel/data/scratch/xcal/dssc_geo_june19.h5\" # detector geometry file\n", "dinstance = \"DSSC1M1\"\n", "\n", - "def balance_sequences(in_folder, run, sequences, sequences_per_node):\n", - " import glob\n", - " import re\n", - " import numpy as np\n", - " if sequences[0] == -1:\n", - " sequence_files = glob.glob(\"{}/r{:04d}/*-S*.h5\".format(in_folder, run))\n", - " seq_nums = set()\n", - " for sf in sequence_files:\n", - " seqnum = re.findall(r\".*-S([0-9]*).h5\", sf)[0]\n", - " seq_nums.add(int(seqnum))\n", - " seq_nums -= set(sequences)\n", - " else:\n", - " seq_nums = set(sequences)\n", - " nsplits = len(seq_nums)//sequences_per_node+1\n", - " while nsplits > 32:\n", - " sequences_per_node += 1\n", - " nsplits = len(seq_nums)//sequences_per_node+1\n", - " print(\"Changed to {} sequences per node to have a maximum of 8 concurrent jobs\".format(sequences_per_node))\n", - " return [l.tolist() for l in np.array_split(list(seq_nums), nsplits) if l.size > 0]\n", + "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n", + " from xfel_calibrate.calibrate import balance_sequences as bs\n", + " return bs(in_folder, run, sequences, sequences_per_node, karabo_da)\n", " " ] }, @@ -89,12 +82,12 @@ "import matplotlib.pyplot as plt\n", "from ipyparallel import Client\n", "from IPython.display import display, Markdown, Latex\n", - "print(\"Connecting to profile {}\".format(cluster_profile))\n", + "print(f\"Connecting to profile {cluster_profile}\")\n", "view = Client(profile=cluster_profile)[:]\n", "view.use_dill()\n", "\n", "from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions\n", - "from cal_tools.tools import (gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name,\n", + "from cal_tools.tools import (map_modules_from_folder, parse_runs, run_prop_seq_from_path, get_notebook_name,\n", " get_dir_creation_date, get_constant_from_db)\n", "\n", "from dateutil import parser\n", @@ -104,24 +97,30 @@ "creation_time = None\n", "if use_dir_creation_date:\n", " creation_time = get_dir_creation_date(in_folder, run)\n", - " print(\"Using {} as creation time\".format(creation_time))\n", - "\n", - "in_folder = \"{}/r{:04d}\".format(in_folder, run)\n", - "\n", + " print(f\"Using {creation_time} as creation time\")\n", "\n", "if sequences[0] == -1:\n", " sequences = None\n", " \n", + "h5path = h5path.format(karabo_id, receiver_id)\n", + "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", + "\n", + "\n", + "if karabo_da[0] == -1:\n", + " if modules[0] == -1:\n", + " modules = list(range(16))\n", + " karabo_da = [\"DSSC{:02d}\".format(i) for i in modules]\n", + "else:\n", + " modules = [int(x[-2:]) for x in karabo_da]\n", + "print(\"Process modules: \", \n", + " ', '.join([f\"Q{x // 4 + 1}M{x % 4 + 1}\" for x in modules]))\n", "\n", - "QUADRANTS = 4\n", - "MODULES_PER_QUAD = 4\n", - "DET_FILE_INSET = \"DSSC\"\n", "CHUNK_SIZE = 512\n", "MAX_PAR = 32\n", "\n", "if in_folder[-1] == \"/\":\n", " in_folder = in_folder[:-1]\n", - "print(\"Outputting to {}\".format(out_folder))\n", + "print(f\"Outputting to {out_folder}\")\n", "\n", "if not os.path.exists(out_folder):\n", " os.makedirs(out_folder)\n", @@ -131,12 +130,7 @@ "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", - "\n", - "loc = None\n", - "if instrument == \"SCS\":\n", - " loc = \"SCS_DET_DSSC1M-1\"\n", - " dinstance = \"DSSC1M1\"\n", - "print(\"Detector in use is {}\".format(loc)) \n", + "print(f\"Detector in use is {karabo_id}\") \n", "\n", "if offset_image.upper() != \"PP\":\n", " offset_image = int(offset_image)" @@ -225,50 +219,8 @@ "outputs": [], "source": [ "# set everything up filewise\n", - "from queue import Queue\n", - "from collections import OrderedDict\n", - "\n", - "def map_modules_from_files(filelist):\n", - " module_files = OrderedDict()\n", - " mod_ids = OrderedDict()\n", - " total_sequences = 0\n", - " sequences_qm = {}\n", - " one_module = None\n", - " for quadrant in range(0, QUADRANTS):\n", - " for module in range(0, MODULES_PER_QUAD):\n", - " name = \"Q{}M{}\".format(quadrant + 1, module + 1)\n", - " module_files[name] = Queue()\n", - " num = quadrant * 4 + module\n", - " mod_ids[name] = num\n", - " file_infix = \"{}{:02d}\".format(DET_FILE_INSET, num)\n", - " sequences_qm[name] = 0\n", - " for file in filelist:\n", - " if file_infix in file:\n", - " if not one_module:\n", - " one_module = file, num\n", - " module_files[name].put(file)\n", - " total_sequences += 1\n", - " sequences_qm[name] += 1\n", - " \n", - " return module_files, mod_ids, total_sequences, sequences_qm, one_module\n", - "\n", - "dirlist = sorted(os.listdir(in_folder))\n", - "file_list = []\n", - "\n", - "\n", - "for entry in dirlist:\n", - " #only h5 file\n", - " abs_entry = \"{}/{}\".format(in_folder, entry)\n", - " if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == \".h5\":\n", - " \n", - " if sequences is None:\n", - " file_list.append(abs_entry)\n", - " else:\n", - " for seq in sequences:\n", - " if \"{:05d}.h5\".format(seq) in abs_entry:\n", - " file_list.append(os.path.abspath(abs_entry))\n", - " \n", - "mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)\n", + "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n", + "mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf\n", "MAX_PAR = min(MAX_PAR, total_sequences)" ] }, @@ -293,7 +245,7 @@ "import copy\n", "from IPython.display import HTML, display, Markdown, Latex\n", "import tabulate\n", - "print(\"Processing a total of {} sequence files in chunks of {}\".format(total_sequences, MAX_PAR))\n", + "print(f\"Processing a total of {total_sequences} sequence files in chunks of {MAX_PAR}\")\n", "table = []\n", "mfc = copy.copy(mapped_files)\n", "ti = 0\n", @@ -310,7 +262,8 @@ "if len(table):\n", " md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=[\"#\", \"module\", \"# module\", \"file\"]))) \n", "# restore the queue\n", - "mapped_files, mod_ids, total_sequences, sequences_qm, one_module = map_modules_from_files(file_list)" + "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n", + "mapped_files, mod_ids, total_sequences, sequences_qm, file_size = mmf" ] }, { @@ -326,9 +279,10 @@ "source": [ "import copy\n", "from functools import partial\n", - "def correct_module(total_sequences, sequences_qm, loc, dinstance, offset_image,\n", + "def correct_module(total_sequences, sequences_qm, karabo_id, dinstance, offset_image,\n", " mask_noisy_asic, mask_cold_asic, noisy_pix_threshold, chunksize,\n", - " mem_cells, bias_voltage, cal_db_timeout, creation_time, cal_db_interface, inp):\n", + " mem_cells, bias_voltage, cal_db_timeout, creation_time, cal_db_interface,\n", + " h5path, h5path_idx, inp):\n", " import numpy as np\n", " import copy\n", " import h5py\n", @@ -341,8 +295,15 @@ " import binascii\n", " \n", " filename, filename_out, channel, qm = inp\n", - " h5path = \"INSTRUMENT/{}/DET/{}CH0:xtdf/\".format(loc, channel)\n", - " h5path_idx = \"INDEX/{}/DET/{}CH0:xtdf/\".format(loc, channel)\n", + " \n", + " # DSSC correction requires path without the leading \"/\"\n", + " if h5path[0] == '/':\n", + " h5path = h5path[1:]\n", + " if h5path_idx[0] == '/':\n", + " h5path_idx = h5path_idx[1:]\n", + "\n", + " h5path = h5path.format(channel)\n", + " h5path_idx = h5path_idx.format(channel)\n", " \n", " low_edges = None\n", " hists_signal_low = None\n", @@ -351,34 +312,33 @@ " pulse_edges = None\n", " err = None\n", " offset_not_found = False\n", - " def get_num_cells(fname, loc, module):\n", + " def get_num_cells(fname, h5path):\n", " with h5py.File(fname, \"r\") as f:\n", "\n", - " cells = f[\"INSTRUMENT/{}/DET/{}CH0:xtdf/image/cellId\".format(loc, module)][()]\n", + " cells = f[f\"{h5path}/cellId\"][()]\n", " maxcell = np.max(cells)\n", " options = [100, 200, 400, 500, 600, 700, 800]\n", " dists = np.array([(o-maxcell) for o in options])\n", " dists[dists<0] = 10000 # assure to always go higher\n", " return options[np.argmin(dists)]\n", " \n", - " def get_checksum(fname, loc, module):\n", + " def get_checksum(fname, h5path, h5path_idx):\n", " with h5py.File(fname, \"r\") as infile:\n", - " count = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/count\".format(loc, channel)])\n", - " first = np.squeeze(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/first\".format(loc, channel)])\n", + " count = np.squeeze(infile[f\"{h5path_idx}/count\"])\n", + " first = np.squeeze(infile[f\"{h5path_idx}/first\"])\n", " last_index = int(first[count != 0][-1]+count[count != 0][-1])\n", " first_index = int(first[count != 0][0])\n", - " pulseids = infile[\"INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId\".format(loc, channel)][first_index:int(first[count != 0][1])]\n", + " pulseids = infile[f\"{h5path}/pulseId\"][first_index:int(first[count != 0][1])]\n", " bveto = blake2b(pulseids.data, digest_size=8)\n", " pulseid_checksum = struct.unpack('d', binascii.unhexlify(bveto.hexdigest()))[0]\n", " return pulseid_checksum\n", " \n", - "\n", " if mem_cells == 0:\n", - " mem_cells = get_num_cells(filename, loc, channel)\n", + " mem_cells = get_num_cells(filename, h5path)\n", " \n", - " pulseid_checksum = get_checksum(filename, loc, channel)\n", + " pulseid_checksum = get_checksum(filename, h5path, h5path_idx)\n", " \n", - " print(\"Memcells: {}\".format(mem_cells))\n", + " print(f\"Memcells: {mem_cells}\")\n", " \n", " condition = Conditions.Dark.DSSC(bias_voltage=bias_voltage, memory_cells=mem_cells,\n", " pulseid_checksum=pulseid_checksum)\n", @@ -387,8 +347,8 @@ " detinst = getattr(Detectors, dinstance)\n", " device = getattr(detinst, qm)\n", " with h5py.File(filename, \"r\", driver=\"core\") as infile:\n", - " y = infile[h5path+\"image/data\"].shape[2]\n", - " x = infile[h5path+\"image/data\"].shape[3]\n", + " y = infile[f\"{h5path}/data\"].shape[2]\n", + " x = infile[f\"{h5path}/data\"].shape[3]\n", " offset, when = get_constant_from_db_and_time(device,\n", " Constants.DSSC.Offset(),\n", " condition,\n", @@ -408,7 +368,7 @@ " def copy_and_sanitize_non_cal_data(infile, outfile):\n", " # these are touched in the correct function, do not copy them here\n", " dont_copy = [\"data\"]\n", - " dont_copy = [h5path + \"image/{}\".format(do)\n", + " dont_copy = [h5path + \"/{}\".format(do)\n", " for do in dont_copy]\n", "\n", " # a visitor to copy everything else\n", @@ -429,19 +389,19 @@ " with h5py.File(filename_out, \"w\") as outfile:\n", " copy_and_sanitize_non_cal_data(infile, outfile)\n", " # get indices of last images in each train\n", - " first_arr = np.squeeze(infile[h5path_idx+\"image/first\"]).astype(np.int)\n", + " first_arr = np.squeeze(infile[f\"{h5path_idx}/first\"]).astype(np.int)\n", " last_arr = np.concatenate((first_arr[1:], np.array([-1,]))).astype(np.int)\n", " assert first_arr.size == last_arr.size\n", - " oshape = list(infile[h5path+\"image/data\"].shape)\n", + " oshape = list(infile[f\"{h5path}/data\"].shape)\n", " if len(oshape) == 4:\n", " oshape = [oshape[0],]+oshape[2:]\n", " chunks = (chunksize, oshape[1], oshape[2])\n", - " ddset = outfile.create_dataset(h5path + \"image/data\",\n", + " ddset = outfile.create_dataset(f\"{h5path}/data\",\n", " oshape, chunks=chunks,\n", " dtype=np.float32,\n", " fletcher32=True)\n", "\n", - " mdset = outfile.create_dataset(h5path + \"image/mask\",\n", + " mdset = outfile.create_dataset(f\"{h5path}/mask\",\n", " oshape, chunks=chunks,\n", " dtype=np.uint32,\n", " compression=\"gzip\",\n", @@ -452,9 +412,9 @@ " for train in range(first_arr.size):\n", " first = first_arr[train]\n", " last = last_arr[train]\n", - " data = np.squeeze(infile[h5path+\"image/data\"][first:last, ...].astype(np.float32))\n", - " cellId = np.squeeze(infile[h5path+\"image/cellId\"][first:last, ...])\n", - " pulseId = np.squeeze(infile[h5path+\"image/pulseId\"][first:last, ...])\n", + " data = np.squeeze(infile[f\"{h5path}/data\"][first:last, ...].astype(np.float32))\n", + " cellId = np.squeeze(infile[f\"{h5path}/cellId\"][first:last, ...])\n", + " pulseId = np.squeeze(infile[f\"{h5path}/pulseId\"][first:last, ...])\n", "\n", " if offset_image != \"PP\" and offset_not_found:\n", " data -= data[offset_image, ...]\n", @@ -479,7 +439,7 @@ " # data[data < 0] = 0\n", " ddset[first:last, ...] = data\n", " # find static and noisy values in dark images\n", - " data = infile[h5path+\"image/data\"][last, ...].astype(np.float32)\n", + " data = infile[f\"{h5path}/data\"][last, ...].astype(np.float32)\n", " bpix = np.zeros(oshape[1:], np.uint32)\n", " dark_std = np.std(data, axis=0)\n", " bpix[dark_std > noisy_pix_threshold] = BadPixels.NOISE_OUT_OF_THRESHOLD.value\n", @@ -539,21 +499,22 @@ " fname_in = str(mapped_files[qm].get())\n", " dones.append(mapped_files[qm].empty())\n", " else:\n", - " print(\"{} file is missing\".format(qm))\n", + " print(f\"{qm} file is missing\")\n", " continue\n", " else:\n", - " print(\"Skipping {}\".format(qm))\n", + " print(f\"Skipping {qm}\")\n", " continue\n", " fout = os.path.abspath(\"{}/{}\".format(out_folder, (os.path.split(fname_in)[-1]).replace(\"RAW\", \"CORR\")))\n", " first_files.append((i, fname_in, fout))\n", " inp.append((fname_in, fout, i, qm))\n", "\n", " if len(inp) >= min(MAX_PAR, left):\n", - " print(\"Running {} tasks parallel\".format(len(inp)))\n", + " print(f\"Running {len(inp)} tasks parallel\")\n", " p = partial(correct_module, total_sequences, sequences_qm,\n", - " loc, dinstance, offset_image, mask_noisy_asic,\n", + " karabo_id, dinstance, offset_image, mask_noisy_asic,\n", " mask_cold_asic, noisy_pix_threshold, chunk_size_idim,\n", - " mem_cells, bias_voltage, cal_db_timeout, creation_time, cal_db_interface)\n", + " mem_cells, bias_voltage, cal_db_timeout, creation_time, cal_db_interface,\n", + " h5path, h5path_idx)\n", "\n", " r = view.map_sync(p, inp)\n", " #r = list(map(p, inp))\n", @@ -583,12 +544,12 @@ " if Errors[i] is not None:\n", "\n", " # Avoid writing wrong injection date if cons. not found.\n", - " if \"not found\" in Errors[i]:\n", - " print(\"ERROR! {}: {}\".format(qm, Errors[i]))\n", + " if \"not found\" in str(Errors[i]):\n", + " print(f\"ERROR! {qm}: {Errors[i]}\")\n", " else:\n", - " print(\"Offset for {} was injected on {}, ERROR!: {}\".format(qm, when, Errors[i]))\n", + " print(f\"Offset for {qm} was injected on {when}, ERROR!: {Errors[i]}\")\n", " else:\n", - " print(\"Offset for {} was injected on {}\".format(qm, when))" + " print(f\"Offset for {qm} was injected on {when}\")" ] }, { @@ -698,13 +659,13 @@ "for ff in first_files:\n", " try:\n", " channel, raw_file, corr_file = ff\n", + " data_path = h5path.format(channel)\n", + " index_path = h5path_idx.format(channel)\n", " try:\n", " infile = h5py.File(raw_file, \"r\")\n", - " first_idx = int(np.array(infile[\"/INDEX/{}/DET/{}CH0:xtdf/image/first\"\n", - " .format(loc, channel)])[0])\n", + " first_idx = int(np.array(infile[f\"{index_path}/first\"])[0])\n", " \n", - " raw_d = np.array(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data\"\n", - " .format(loc, channel)])\n", + " raw_d = np.array(infile[f\"{data_path}/data\"])\n", " # Use first 128 images for plotting\n", " if raw_d.shape[0] >= 128:\n", " # random number for plotting\n", @@ -718,14 +679,10 @@ " \n", " infile = h5py.File(corr_file, \"r\")\n", " try:\n", - " corrected.append(np.array(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/data\"\n", - " .format(loc, channel)][first_idx:last_idx,...]))\n", - " mask.append(np.array(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/mask\"\n", - " .format(loc, channel)][first_idx:last_idx,...]))\n", - " pulse_ids.append(np.squeeze(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/pulseId\"\n", - " .format(loc, channel)][first_idx:last_idx,...]))\n", - " train_ids.append(np.squeeze(infile[\"/INSTRUMENT/{}/DET/{}CH0:xtdf/image/trainId\"\n", - " .format(loc, channel)][first_idx:last_idx,...]))\n", + " corrected.append(np.array(infile[f\"{data_path}/data\"][first_idx:last_idx,...]))\n", + " mask.append(np.array(infile[f\"{data_path}/mask\"][first_idx:last_idx,...]))\n", + " pulse_ids.append(np.squeeze(infile[f\"{data_path}/pulseId\"][first_idx:last_idx,...]))\n", + " train_ids.append(np.squeeze(infile[f\"{data_path}/trainId\"][first_idx:last_idx,...]))\n", " finally:\n", " infile.close()\n", " \n", @@ -1042,20 +999,6 @@ " vmax=1, cmap=\"jet\")\n", "cb = fig.colorbar(im, ax=ax)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1074,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.6" } }, "nbformat": 4,