diff --git a/notebooks/LPD/LPDChar_Darks_NBC.ipynb b/notebooks/LPD/LPDChar_Darks_NBC.ipynb index 3bdc0114f0e8d5faba892ff80eae39f8f7c294f7..602cc793df6b7237b53b2f15e24c39dbb3d81c1f 100644 --- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb +++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb @@ -25,7 +25,6 @@ "in_folder = \"/gpfs/exfel/exp/FXE/202304/p003338/raw\" # path to input data, required\n", "out_folder = \"/gpfs/exfel/data/scratch/kluyvert/lpd-darks-p3338-r133-134-135/\" # path to output to, required\n", "metadata_folder = \"\" # Directory containing calibration_metadata.yml when run by xfel-calibrate\n", - "sequence = 0 # sequence files to evaluate\n", "modules = [-1] # list of modules to evaluate, RANGE ALLOWED\n", "run_high = 133 # run number in which high gain data was recorded, required\n", "run_med = 134 # run number in which medium gain data was recorded, required\n", @@ -33,10 +32,7 @@ "\n", "karabo_id = \"FXE_DET_LPD1M-1\" # karabo karabo_id\n", "karabo_da = ['-1'] # a list of data aggregators names, Default [-1] for selecting all data aggregators\n", - "receiver_id = \"{}CH0\" # inset for receiver devices\n", - "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n", - "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", - "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n", + "source_name = \"{}/DET/{}CH0:xtdf\" # Source name for raw detector data - filled with karabo_id & module number\n", "\n", "use_dir_creation_date = True # use the creation date of the directory for database time derivation\n", "cal_db_interface = \"tcp://max-exfl016:8015#8025\" # the database interface to use\n", @@ -51,13 +47,14 @@ "thresholds_offset_hard = [400, 1500] # bad pixel hard threshold\n", "thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise\n", "thresholds_noise_hard = [1, 35] # bad pixel hard threshold\n", - "skip_first_ntrains = 10 # Number of first trains to skip\n", + "\n", + "ntrains = 500 # maximum number of trains to use in each gain stage\n", + "skip_first_ntrains = 10 # Number of first trains to skip\n", + "min_trains = 370 # minimum number of trains needed for each gain stage\n", "\n", "# Parameters for plotting\n", "skip_plots = False # exit after writing corrected files\n", "\n", - "instrument = \"FXE\" # instrument name\n", - "ntrains = 100 # number of trains to use\n", "high_res_badpix_3d = False # plot bad-pixel summary in high resolution\n", "test_for_normality = False # permorm normality test\n", "inject_cell_order = False # Include memory cell order as part of the detector condition\n", @@ -81,7 +78,6 @@ "warnings.filterwarnings('ignore')\n", "\n", "import dateutil.parser\n", - "import h5py\n", "import matplotlib\n", "import pasha as psh\n", "import scipy.stats\n", @@ -98,6 +94,7 @@ "from iCalibrationDB import Conditions, Constants, Detectors, Versions\n", "from XFELDetAna.plotting.heatmap import heatmapPlot\n", "from XFELDetAna.plotting.simpleplot import simplePlot\n", + "from extra_data import RunDirectory\n", "\n", "from cal_tools.enums import BadPixels\n", "from cal_tools.plotting import (\n", @@ -116,6 +113,7 @@ " map_gain_stages,\n", " module_index_to_qm,\n", " parse_runs,\n", + " reorder_axes,\n", " run_prop_seq_from_path,\n", " save_const_to_h5,\n", " send_to_db,\n", @@ -140,17 +138,8 @@ "else:\n", " modules = [int(x[-2:]) for x in karabo_da]\n", "\n", - "gain_runs = {\n", - " \"high\": run_high,\n", - " \"med\": run_med,\n", - " \"low\": run_low,\n", - "}\n", - "\n", "capacitor_setting_s = f'{capacitor_setting}pf'\n", "\n", - "h5path = h5path.format(karabo_id, receiver_id)\n", - "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n", - "\n", "creation_time = None\n", "if use_dir_creation_date:\n", " creation_time = get_dir_creation_date(in_folder, run_high)\n", @@ -165,7 +154,6 @@ "print(\"Proposal: {}\".format(prop))\n", "print(\"Memory cells: {}/{}\".format(mem_cells, max_cells))\n", "print(\"Runs: {}, {}, {}\".format(run_high, run_med, run_low))\n", - "print(\"Sequence: {}\".format(sequence))\n", "print(\"Using DB: {}\".format(db_output))\n", "print(\"Input: {}\".format(in_folder))\n", "print(\"Output: {}\".format(out_folder))\n", @@ -173,18 +161,6 @@ "print(f\"Capacitor setting: {capacitor_setting}pf\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# set everything up filewise\n", - "gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])\n", - "gain_mapped_files, total_sequences, total_file_size = gmf\n", - "print(f\"Will process a total of {total_sequences} files.\")" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -202,41 +178,35 @@ "parallel_num_threads = multiprocessing.cpu_count() // parallel_num_procs\n", "\n", "# the actual characterization\n", - "def characterize_module(filename, channel, gg):\n", - "\n", - " def splitOffGainLPD(d):\n", - " msk = np.zeros(d.shape, np.uint16)\n", - " msk[...] = 0b0000111111111111\n", - " data = np.bitwise_and(d, msk)\n", - " msk[...] = 0b0011000000000000\n", - " gain = np.bitwise_and(d, msk)//4096\n", - " gain[gain > 2] = 2\n", - " return data, gain\n", - "\n", - " infile = h5py.File(filename, \"r\")\n", + "def characterize_module(run_path, channel, gg):\n", + " run = RunDirectory(run_path)\n", + " det_source = source_name.format(karabo_id, channel)\n", + " data = run[det_source, 'image.data'].drop_empty_trains()\n", + " data = data[skip_first_ntrains : skip_first_ntrains + ntrains]\n", + " cell_ids = run[det_source, 'image.cellId'].drop_empty_trains()\n", + " cell_ids = cell_ids[skip_first_ntrains : skip_first_ntrains + ntrains]\n", " \n", - " instrument_src = h5path.format(channel)\n", - " index_src = h5path_idx.format(channel)\n", - " count = infile[f\"{index_src}/count\"][()]\n", - " first = infile[f\"{index_src}/first\"][()]\n", - " valid = count != 0\n", - " count, first = count[valid], first[valid]\n", - " first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)\n", - " last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))\n", - "\n", - " im = np.array(infile[\"{}/data\".format(instrument_src, channel)][first_image:last_image, ...])\n", - " cellid = np.squeeze(np.array(infile[\"{}/cellId\".format(instrument_src, channel)][first_image:last_image, ...]))\n", - " infile.close()\n", - " if im.shape[0] == 0: # No data\n", - " return None, None, channel, gg, None, None, None, None\n", - "\n", - " cellid_pattern = cellid[:count[0]]\n", - "\n", - " im, g = splitOffGainLPD(im[:, 0, ...])\n", - " im = im.astype(np.float32)\n", + " if len(data.train_ids) < min_trains:\n", + " raise Exception(f\"Run {run_path} only contains {len(data.train_ids)} trains, but {min_trains} required\")\n", + "\n", + " im = data.ndarray()\n", + " if im.ndim > 3:\n", + " im = im[:, 0] # Drop extra dimension\n", + " \n", + " cellid = cell_ids.ndarray()\n", + " cellid_pattern = cell_ids[0].ndarray()\n", + " if cellid.ndim > 1:\n", + " cellid = cellid[:, 0]\n", + " cellid_pattern = cellid_pattern[:, 0]\n", + "\n", + " # Mask off gain bits, leaving only data\n", + " im &= 0b0000111111111111\n", "\n", - " im = np.rollaxis(im, 2)\n", - " im = np.rollaxis(im, 2, 1)\n", + " im = im.astype(np.float32)\n", + " im = reorder_axes(im,\n", + " from_order=('frames', 'slow_scan', 'fast_scan'),\n", + " to_order=('fast_scan', 'slow_scan', 'frames'),\n", + " )\n", "\n", " context = psh.context.ThreadContext(num_workers=parallel_num_threads)\n", " offset = context.alloc(shape=(im.shape[0], im.shape[1], max_cells), dtype=np.float64)\n", @@ -295,19 +265,12 @@ "# Should be the same cell order for all modules & all gain stages\n", "cellid_patterns_g = {}\n", "\n", - "gg = 0\n", - "inp = []\n", "\n", - "# This loop needs to get runs in the order high, medium, low\n", - "for gain, mapped_files in gain_mapped_files.items():\n", + "inp = []\n", + "for gg, run_num in enumerate([run_high, run_med, run_low]):\n", + " run_path = os.path.join(in_folder, f\"r{run_num:04d}\")\n", " for i in modules:\n", - " qm = module_index_to_qm(i)\n", - " if qm in mapped_files and not mapped_files[qm].empty():\n", - " fname_in = mapped_files[qm].get()\n", - " print(\"Process file: \", fname_in)\n", - " inp.append((fname_in, i, gg))\n", - "\n", - " gg+=1\n", + " inp.append((run_path, i, gg))\n", "\n", "with multiprocessing.Pool(processes=parallel_num_procs) as pool:\n", " results = pool.starmap(characterize_module, inp)\n", @@ -321,6 +284,7 @@ " qm = module_index_to_qm(i)\n", " if qm not in offset_g:\n", " offset_g[qm] = np.zeros(offset.shape[:3] + (3,))\n", + " print(\"Constant shape:\", offset_g[qm].shape)\n", " noise_g[qm] = np.zeros_like(offset_g[qm])\n", " badpix_g[qm] = np.zeros_like(offset_g[qm], dtype=np.uint32)\n", " data_g[qm] = np.full((ntrains, 3), np.nan)\n",