From b62970eda01481fb764e1c61ba6ecbb42b4781fd Mon Sep 17 00:00:00 2001 From: ahmedk <karim.ahmed@xfel.eu> Date: Fri, 3 Sep 2021 20:23:54 +0200 Subject: [PATCH] started to remove h5py --- notebooks/pnCCD/Correct_pnCCD_NBC.ipynb | 439 ++++++++++++------------ 1 file changed, 219 insertions(+), 220 deletions(-) diff --git a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb index 9bbaf925b..cffad2c30 100644 --- a/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb +++ b/notebooks/pnCCD/Correct_pnCCD_NBC.ipynb @@ -23,7 +23,7 @@ "outputs": [], "source": [ "in_folder = \"/gpfs/exfel/exp/SQS/202031/p900166/raw\" # input folder\n", - "out_folder = '/gpfs/exfel/data/scratch/setoodeh/Test' # output folder\n", + "out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove' # output folder\n", "run = 347 # which run to read data from\n", "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n", "\n", @@ -34,8 +34,8 @@ "receiver_id = \"PNCCD_FMT-0\" # inset for receiver devices\n", "path_template = 'RAW-R{:04d}-{}-S{{:05d}}.h5' # the template to use to access data\n", "path_template_seqs = \"{}/r{:04d}/*PNCCD01-S*.h5\"\n", - "h5path = '/INSTRUMENT/{}/CAL/{}:output/data/' # path to data in the HDF5 file \n", - "h5path_ctrl = '/CONTROL/{}/CTRL/TCTRL'\n", + "h5path = '{}/CAL/{}:output' # path to data in the HDF5 file \n", + "h5path_ctrl = '{}/CTRL/TCTRL'\n", "\n", "overwrite = True # keep this as True to not overwrite the output \n", "use_dir_creation_date = True # To obtain creation time of the run\n", @@ -70,7 +70,12 @@ "cti = False # Apply charge transfer inefficiency correction (not implemented, yet)\n", "do_pattern_classification = True # classify split events\n", "\n", - "saturated_threshold = 32000. # full well capacity in ADU" + "saturated_threshold = 32000. # full well capacity in ADU\n", + "\n", + "\n", + "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n", + " from xfel_calibrate.calibrate import balance_sequences as bs\n", + " return bs(in_folder, run, sequences, sequences_per_node, karabo_da)" ] }, { @@ -116,6 +121,7 @@ "warnings.filterwarnings('ignore')\n", "\n", "import h5py\n", + "from extra_data import H5File\n", "import matplotlib.pyplot as plt\n", "from iminuit import Minuit\n", "from IPython.display import Markdown, display\n", @@ -128,6 +134,7 @@ " get_constant_from_db_and_time,\n", " get_dir_creation_date,\n", " get_random_db_interface,\n", + " map_modules_from_folder,\n", ")\n", "from iCalibrationDB import Conditions, ConstantMetaData, Constants, Detectors, Versions\n", "from iCalibrationDB.detectors import DetectorTypes\n", @@ -142,11 +149,7 @@ "from XFELDetAna import xfelpycaltools as xcal\n", "from XFELDetAna.plotting.util import prettyPlotting\n", "\n", - "prettyPlotting=True\n", - "\n", - "\n", - "if sequences[0] == -1:\n", - " sequences = None" + "prettyPlotting=True" ] }, { @@ -157,6 +160,9 @@ "source": [ "# Calibration Database Settings, and Some Initial Run Parameters & Paths:\n", "\n", + "if sequences[0] == -1:\n", + " sequences = None\n", + "\n", "display(Markdown('### Initial Settings and Paths'))\n", "pixels_x = 1024 # rows of pnCCD in pixels \n", "pixels_y = 1024 # columns of pnCCD in pixels\n", @@ -165,7 +171,7 @@ "\n", "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n", "file_loc =f'Proposal: {proposal}, Run: {run}'\n", - "print(f'Proposal: {proposal}, Run: {run}')\n", + "print(file_loc)\n", "\n", "# Paths to the data:\n", "ped_dir = \"{}/r{:04d}\".format(in_folder, run)\n", @@ -189,10 +195,26 @@ "if not creation_time and use_dir_creation_date:\n", " creation_time = get_dir_creation_date(in_folder, run)\n", "\n", - "\n", "print(f\"Creation time: {creation_time}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set everything up filewise\n", + "mapped_files, _, total_sequences, _, _ = map_modules_from_folder(\n", + " in_folder=in_folder,\n", + " run=run,\n", + " path_template='RAW-R{:04d}-{}-S{:05d}.h5',\n", + " karabo_da=karabo_da,\n", + " sequences=sequences,\n", + " qm_naming=False,\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -344,8 +366,8 @@ "metadata": {}, "outputs": [], "source": [ - "display(Markdown('### List of Files to be Processed'))\n", - "print(\"Reading data from the following files:\")\n", + "display(Markdown('### List of files to be Processed'))\n", + "print(\"Reading data from the following files: \")\n", "for i in range(len(file_list)):\n", " print(file_list[i])" ] @@ -645,36 +667,6 @@ "Applying offset and common mode corrections to the raw data" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-12-06T16:08:53.551111Z", - "start_time": "2018-12-06T16:08:53.531064Z" - } - }, - "outputs": [], - "source": [ - "def copy_and_sanitize_non_cal_data(infile: str, outfile: str, h5base: str):\n", - " '''This function reads the .h5 data and writes the corrected .h5 data.'''\n", - " if h5base.startswith(\"/\"):\n", - " h5base = h5base[1:]\n", - " dont_copy = ['image']\n", - " dont_copy = [h5base+\"/{}\".format(do) for do in dont_copy]\n", - "\n", - " def visitor(k, item):\n", - " if k not in dont_copy:\n", - " if isinstance(item, h5py.Group):\n", - " outfile.create_group(k)\n", - " elif isinstance(item, h5py.Dataset):\n", - " group = str(k).split(\"/\")\n", - " group = \"/\".join(group[:-1])\n", - " infile.copy(k, outfile[group])\n", - " \n", - " infile.visititems(visitor)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -694,156 +686,164 @@ " relGain = constants[\"RelativeGain\"]\n", "\n", "for k, f in enumerate(file_list):\n", - " with h5py.File(f, 'r', driver='core') as infile:\n", - " out_fileb = \"{}/{}\".format(out_folder, f.split(\"/\")[-1])\n", - " out_file = out_fileb.replace(\"RAW\", \"CORR\")\n", + " f_dc = H5File(f)\n", + " out_fileb = f\"{out_folder}/{f.split(\"/\")[-1]}\"\n", + " out_file = out_fileb.replace(\"RAW\", \"CORR\")\n", "\n", - " data = None\n", - " noise = None\n", - " \n", - " try:\n", - " with h5py.File(out_file, \"w\") as ofile:\n", - " copy_and_sanitize_non_cal_data(infile, ofile, h5path)\n", - " data = infile[h5path+\"/image\"][()]\n", - " # Getting rid of empty frames:\n", - " nzidx = np.count_nonzero(data, axis=(1, 2))\n", - " data = data[nzidx != 0, ...]\n", - " \n", - " # If you want to analyze only a certain number of frames instead of all available good frames: \n", - " if limit_images > 0:\n", - " data = data[:limit_images,...]\n", - " \n", - " # used for array shapes in the corrected data sets that we create and save in this loop:\n", - " oshape = data.shape\n", - " \n", - " data = np.moveaxis(data, 0, 2)\n", - " \n", - " # data set to save offset corrected images:\n", - " ddset = ofile.create_dataset(h5path+\"/pixels\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32)\n", - " # data set to create bad pixels:\n", - " ddsetm = ofile.create_dataset(h5path+\"/mask\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.uint32, compression=\"gzip\")\n", - " \n", - " data = data.astype(np.float32) \n", - " \n", - " # Creating maps for correction usage:\n", - " offset = np.repeat(offsetMap[...,None], data.shape[2], axis=2)\n", - " noise = np.repeat(noiseMap[...,None], data.shape[2], axis=2)\n", - " bpix = np.repeat(badPixelMap[...,None], data.shape[2], axis=2)\n", - " if corr_bools.get('relgain'):\n", - " rg = np.repeat(relGain[:,:,None], data.shape[2], axis=2) # rg = relative gain\n", - " \n", - " # non-corrected images for first sequence only:\n", - " if k == 0:\n", - " uncor = np.append(uncor, data, axis=2)\n", - " \n", - " histCalRaw.fill(data) # filling histogram with raw uncorrected data\n", - " \n", - " # equating bad pixels' values to np.nan so that the pattern classifier ignores them:\n", - " data[bpix != 0] = np.nan\n", - " \n", - " data -= offset # offset correction \n", - " \n", - " # Offset corrected images for first sequence only:\n", - " if k == 0:\n", - " off_cor = np.append(off_cor, data, axis=2)\n", - " histCalOffsetCor.fill(data) # filling histogram with offset corrected data\n", - "\n", - " ddset[...] = np.moveaxis(data, 2, 0)\n", - " ddsetm[...] = np.moveaxis(bpix, 2, 0)\n", - " ofile.flush()\n", - "\n", - " # cm: common mode\n", - " if corr_bools.get('common_mode'):\n", - " \n", - " # data set to save common mode corrected images:\n", - " ddsetcm = ofile.create_dataset(h5path+\"/pixels_cm\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32)\n", - " \n", - " # common mode correction:\n", - " data = cmCorrection.correct(data.astype(np.float32), # common mode correction\n", - " cellTable=np.zeros(data.shape[2], np.int32)) \n", - " \n", - " # discarding events caused by saturated pixels:\n", - " # we equate these values to np.nan so that the pattern classifier ignores them:\n", - " data[data >= saturated_threshold] = np.nan \n", - " histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data\n", - " # common mode corrected images for first sequence only:\n", - " if k == 0:\n", - " cm_cor = np.append(cm_cor, data, axis=2)\n", - " \n", - " ddsetcm[...] = np.moveaxis(data, 2, 0)\n", - " \n", - " if corr_bools.get('relgain'):\n", - " # data set to save gain corrected images:\n", - " ddsetg = ofile.create_dataset(h5path+\"/gain\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32, compression=\"gzip\")\n", - " \n", - " data /= rg # relative gain correction \n", - " histCalGainCor.fill(data) # filling histogram with gain corrected data\n", - " # gain corrected images for first sequence only:\n", - " if k == 0:\n", - " g_cor = np.append(g_cor, data, axis=2)\n", - " ddsetg[...] = np.moveaxis(rg, 2, 0).astype(np.float32)\n", - "\n", - " if corr_bools.get('pattern_class'):\n", - " # data set to save split event corrected images:\n", - " # c: classifications, p: even patterns\n", - " ddsetc = ofile.create_dataset(h5path+\"/pixels_classified\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.float32, compression=\"gzip\")\n", - " # data set to save different valid patterns:\n", - " ddsetp = ofile.create_dataset(h5path+\"/patterns\",\n", - " oshape,\n", - " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", - " dtype=np.int32, compression=\"gzip\")\n", - " \n", - " # The calculation of the cluster map:\n", - " patternClassifierLH._noisemap = noise[:, :pixels_x//2, :]\n", - " patternClassifierRH._noisemap = noise[:, pixels_x//2:, :]\n", - "\n", - " # Dividing the data into left and right hemispheres:\n", - " dataLH = data[:, :pixels_x//2, :]\n", - " dataRH = data[:, pixels_x//2:, :]\n", - " \n", - " # pattern classification on corrected data\n", - " dataLH, patternsLH = patternClassifierLH.classify(dataLH) \n", - " dataRH, patternsRH = patternClassifierRH.classify(dataRH)\n", - "\n", - " data[:, :pixels_x//2, :] = dataLH\n", - " data[:, pixels_x//2:, :] = dataRH\n", - "\n", - " patterns = np.zeros(data.shape, patternsLH.dtype)\n", - " patterns[:, :pixels_x//2, :] = patternsLH\n", - " patterns[:, pixels_x//2:, :] = patternsRH\n", - "\n", - " data[data < split_evt_primary_threshold*noise] = 0\n", - " ddsetc[...] = np.moveaxis(data, 2, 0)\n", - " ddsetp[...] = np.moveaxis(patterns, 2, 0)\n", - "\n", - " histCalPcorr.fill(data) # filling histogram with split events corrected data\n", - " data[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", - " histCalPcorrS.fill(data) # filling histogram with singles events data\n", - " \n", - " # split event corrected images for first sequence only (also these events are only \n", - " # singles events):\n", - " if k == 0:\n", - " final_cor = np.append(final_cor, data, axis=2)\n", - " \n", - " except Exception as e:\n", - " print(f\"Couldn't calibrate data in {f}: {e}\\n\")\n", + " data = None\n", + " noise = None\n", + "\n", + " try:\n", + " with h5py.File(out_file, 'w') as ofile:\n", + " # Copy RAW non-calibrated sources.\n", + " with h5py.File(f, 'r') as sfile:\n", + " h5_copy_except.h5_copy_except_paths(\n", + " sfile, ofile,\n", + " [\"INSTRUMENT/\"+h5path+\"/data/image\"])\n", + "\n", + " oshape = f_dc[h5path][\"data.image\"].shape\n", + " n_imgs = oshape[0]\n", + " # If you want to analyze only a certain number of frames\n", + " # instead of all available good frames. \n", + " if limit_images > 0:\n", + " n_imgs = limit_images\n", + "\n", + " data_dc = f_dc.select(\n", + " h5path, \"data.image\", require_all=True).select_trains(np._s(n_imgs))\n", + "\n", + " data = np.moveaxis(data, 0, 2)\n", + "\n", + " # data set to save offset corrected images:\n", + " ddset = ofile.create_dataset(\n", + " h5path+\"/pixels\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.float32)\n", + "\n", + " # data set to create bad pixels:\n", + " ddsetm = ofile.create_dataset(\n", + " h5path+\"/mask\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.uint32, compression=\"gzip\")\n", + "\n", + " data = data.astype(np.float32) \n", + "\n", + " # Creating maps for correction usage:\n", + " offset = np.repeat(offsetMap[...,None], data.shape[2], axis=2)\n", + " noise = np.repeat(noiseMap[...,None], data.shape[2], axis=2)\n", + " bpix = np.repeat(badPixelMap[...,None], data.shape[2], axis=2)\n", + " if corr_bools.get('relgain'):\n", + " rg = np.repeat(relGain[:,:,None], data.shape[2], axis=2) # rg = relative gain\n", + "\n", + " # non-corrected images for first sequence only:\n", + " if k == 0:\n", + " uncor = np.append(uncor, data, axis=2)\n", + "\n", + " histCalRaw.fill(data) # filling histogram with raw uncorrected data\n", + "\n", + " # equating bad pixels' values to np.nan so that the pattern classifier ignores them:\n", + " data[bpix != 0] = np.nan\n", + "\n", + " data -= offset # offset correction \n", + "\n", + " # Offset corrected images for first sequence only:\n", + " if k == 0:\n", + " off_cor = np.append(off_cor, data, axis=2)\n", + " histCalOffsetCor.fill(data) # filling histogram with offset corrected data\n", + "\n", + " ddset[...] = np.moveaxis(data, 2, 0)\n", + " ddsetm[...] = np.moveaxis(bpix, 2, 0)\n", + " ofile.flush()\n", + "\n", + " # cm: common mode\n", + " if corr_bools.get('common_mode'):\n", + "\n", + " # data set to save common mode corrected images:\n", + " ddsetcm = ofile.create_dataset(h5path+\"/pixels_cm\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.float32)\n", + "\n", + " # common mode correction:\n", + " data = cmCorrection.correct(data.astype(np.float32), # common mode correction\n", + " cellTable=np.zeros(data.shape[2], np.int32)) \n", + "\n", + " # discarding events caused by saturated pixels:\n", + " # we equate these values to np.nan so that the pattern classifier ignores them:\n", + " data[data >= saturated_threshold] = np.nan \n", + " histCalCommonModeCor.fill(data) # filling histogram with common mode corrected data\n", + " # common mode corrected images for first sequence only:\n", + " if k == 0:\n", + " cm_cor = np.append(cm_cor, data, axis=2)\n", + "\n", + " ddsetcm[...] = np.moveaxis(data, 2, 0)\n", + "\n", + " if corr_bools.get('relgain'):\n", + " # data set to save gain corrected images:\n", + " ddsetg = ofile.create_dataset(h5path+\"/gain\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.float32, compression=\"gzip\")\n", + "\n", + " data /= rg # relative gain correction \n", + " histCalGainCor.fill(data) # filling histogram with gain corrected data\n", + " # gain corrected images for first sequence only:\n", + " if k == 0:\n", + " g_cor = np.append(g_cor, data, axis=2)\n", + " ddsetg[...] = np.moveaxis(rg, 2, 0).astype(np.float32)\n", + "\n", + " if corr_bools.get('pattern_class'):\n", + " # data set to save split event corrected images:\n", + " # c: classifications, p: even patterns\n", + " ddsetc = ofile.create_dataset(h5path+\"/pixels_classified\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.float32, compression=\"gzip\")\n", + " # data set to save different valid patterns:\n", + " ddsetp = ofile.create_dataset(h5path+\"/patterns\",\n", + " oshape,\n", + " chunks=(chunk_size_idim, oshape[1], oshape[2]),\n", + " dtype=np.int32, compression=\"gzip\")\n", + "\n", + " # The calculation of the cluster map:\n", + " patternClassifierLH._noisemap = noise[:, :pixels_x//2, :]\n", + " patternClassifierRH._noisemap = noise[:, pixels_x//2:, :]\n", + "\n", + " # Dividing the data into left and right hemispheres:\n", + " dataLH = data[:, :pixels_x//2, :]\n", + " dataRH = data[:, pixels_x//2:, :]\n", + "\n", + " # pattern classification on corrected data\n", + " dataLH, patternsLH = patternClassifierLH.classify(dataLH) \n", + " dataRH, patternsRH = patternClassifierRH.classify(dataRH)\n", + "\n", + " data[:, :pixels_x//2, :] = dataLH\n", + " data[:, pixels_x//2:, :] = dataRH\n", + "\n", + " patterns = np.zeros(data.shape, patternsLH.dtype)\n", + " patterns[:, :pixels_x//2, :] = patternsLH\n", + " patterns[:, pixels_x//2:, :] = patternsRH\n", + "\n", + " data[data < split_evt_primary_threshold*noise] = 0\n", + " ddsetc[...] = np.moveaxis(data, 2, 0)\n", + " ddsetp[...] = np.moveaxis(patterns, 2, 0)\n", + "\n", + " histCalPcorr.fill(data) # filling histogram with split events corrected data\n", + " data[patterns != 100] = np.nan # Discard doubles, triples, quadruple, clusters, first singles\n", + " histCalPcorrS.fill(data) # filling histogram with singles events data\n", + "\n", + " # split event corrected images for first sequence only (also these events are only \n", + " # singles events):\n", + " if k == 0:\n", + " final_cor = np.append(final_cor, data, axis=2)\n", + "\n", + "except Exception as e:\n", + " print(f\"Couldn't calibrate data in {f}: {e}\\n\")\n", "\n", "print(\"In addition to offset correction, the following corrections were performed:\")\n", + "print[k for k, v in corr_bools.items() if v]\n", + "\n", "for k, v in corr_bools.items():\n", " if v:\n", " print(k)" @@ -1286,35 +1286,35 @@ " with h5py.File(\"{}/CORR-R{:04d}-PNCCD01-S{:05d}.h5\".format(out_folder, run, sequences[seq_num]), 'r', \n", " driver='core') as infile:\n", "\n", - " data = infile[h5path+\"/pixels_classified\"][()].astype(np.float32) # classifications\n", - " patterns = infile[h5path+\"/patterns\"][()].astype(np.float32) # event patterns\n", - " \n", - " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", - " # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns, \n", - " # there are left, right, up, and down indices.\n", - "\n", - " # Separating the events:\n", - " # Singles and First Singles:\n", - " for s in range(100, 102):\n", - " single = copy.copy(data[...])\n", - " single[patterns != s] = np.nan\n", - " singles.append(single)\n", - " \n", - "\n", - " for d in range(200, 204):\n", - " double = copy.copy(data[...])\n", - " double[patterns != d] = np.nan\n", - " doubles.append(double)\n", - "\n", - " for t in range(300, 304):\n", - " triple = copy.copy(data[...])\n", - " triple[patterns != t] = np.nan\n", - " triples.append(triple) \n", - "\n", - " for q in range(400, 404):\n", - " quad = copy.copy(data[...])\n", - " quad[patterns != q] = np.nan\n", - " quads.append(quad)" + " data = infile[h5path+\"/pixels_classified\"][()].astype(np.float32) # classifications\n", + " patterns = infile[h5path+\"/patterns\"][()].astype(np.float32) # event patterns\n", + "\n", + " # events' patterns indices are as follows: 100 (singles), 101 (first singles), 200 - 203 (doubles),\n", + " # 300 - 303 (triples), and 400 - 403 (quadruples). Note that for the last three types of patterns, \n", + " # there are left, right, up, and down indices.\n", + "\n", + " # Separating the events:\n", + " # Singles and First Singles:\n", + " for s in range(100, 102):\n", + " single = copy.copy(data[...])\n", + " single[patterns != s] = np.nan\n", + " singles.append(single)\n", + "\n", + "\n", + " for d in range(200, 204):\n", + " double = copy.copy(data[...])\n", + " double[patterns != d] = np.nan\n", + " doubles.append(double)\n", + "\n", + " for t in range(300, 304):\n", + " triple = copy.copy(data[...])\n", + " triple[patterns != t] = np.nan\n", + " triples.append(triple) \n", + "\n", + " for q in range(400, 404):\n", + " quad = copy.copy(data[...])\n", + " quad[patterns != q] = np.nan\n", + " quads.append(quad)" ] }, { @@ -1330,7 +1330,6 @@ " hs, e = np.histogram(single.flatten(), bins=event_bins, range=b_range) # h: histogram counts, e: bin edges\n", " h += hs\n", " hA += hs # hA: counts all events (see below)\n", - "\n", " \n", " # bin edges array has one extra element => need to plot from 0 to the one before the last element to have the \n", " # same size as h-array => in what follows, we use e[:-1] (-1 means one before the last element)\n", -- GitLab