diff --git a/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb index e34b36a18d2f2a2f73e5f0b5b54b39a835e1d76a..878f94dd44e3bde82d12860550b35250bc8e4f6d 100644 --- a/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb @@ -4,13 +4,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Histogram of single photon spectra\n", + "# Jungfrau Detector Flatfield Gain Calibration - Part 1: Histogram Creation and Fitting\n", "\n", "Author: European XFEL Detector Group, Version: 1.0\n", "\n", - "Creates histograms from flat fields and store it in HDF5 files then perform fitting based on the selecting `fit_func`.\n", + "This notebook performs the first part of the Jungfrau detector flatfield gain calibration process.\n", "\n", - "Histograms are on a pixel-by-pixel, memory cell by memory cell basis to save memory and space, histogram range are to be optimized around the desired peak." + "#### Overview\n", + "\n", + "1. Load Raw Data: Read detector data for specified run. Currently works for only one run.\n", + "2. Initialize Histograms: Create empty histograms for each module and memory cell\n", + "3. Offset Correction: Apply offset subtraction to raw data\n", + "4. Histogram Population: Fill histograms with offset-corrected data\n", + "5. Histogram Fitting: Apply specified function (Charge Sharing, Double Charge Sharing, or Gaussian)\n", + "6. Save Intermediate Results:\n", + " - Store histogram data as HDF5 files\n", + " - Save fit results (gain map, sigma map, chi-square map, alpha map) as HDF5 files\n", + "\n", + "Note: Results from this notebook are used later for Gain Map and BadPixelsFF Generation." ] }, { @@ -49,8 +60,8 @@ "# parameters for the peak finder\n", "n_sigma = 5. # n of sigma above pedestal threshold\n", "ratio = 0.99 # ratio of the next peak amplitude in the peak_finder\n", - "rebin = 1\n", - "\n", + "rebin_factor = 1 # Number of adjacent bins to combine before fitting the histogram.\n", + "initial_sigma = 15. # initial sigma used when fitting the histogram\n", "\n", "def find_das(in_folder, runs, karabo_da):\n", " run = runs[0]\n", @@ -88,7 +99,10 @@ "from cal_tools.jungfrau import jungfrau_ff\n", "from cal_tools.jungfrau.jungfraulib import JungfrauCtrl\n", "from cal_tools.step_timing import StepTimer\n", - "from cal_tools.tools import calcat_creation_time" + "from cal_tools.tools import (\n", + " calcat_creation_time,\n", + " write_constants_fragment,\n", + ")" ] }, { @@ -110,7 +124,6 @@ "metadata": {}, "outputs": [], "source": [ - "begin_stuff = time.localtime()\n", "control_src = control_src_template.format(karabo_id, karabo_da[0])\n", "first_run_folder = in_folder / f'r{runs[0]:04d}'\n", "ctrl_data = JungfrauCtrl(RunDirectory(first_run_folder), control_src)\n", @@ -253,7 +266,12 @@ "jf_metadata = jf_cal.metadata(\n", " calibrations=[\"Offset10Hz\", \"Noise10Hz\"])\n", "\n", - "# TODO: display CCV timestamp\n", + "# Record constant details in YAML metadata\n", + "write_constants_fragment(\n", + " out_folder=(metadata_folder or out_folder),\n", + " det_metadata=jf_metadata,\n", + " caldb_root=jf_cal.caldb_root)\n", + "\n", "const_data = jf_cal.ndarray_map(metadata=jf_metadata)\n", "jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)\n", "\n", @@ -299,7 +317,7 @@ "outputs": [], "source": [ "h_n_bins = int((h_range[1] - h_range[0])/h_bins_s)\n", - "h_bins = np.linspace(h_range[0], h_range[1], h_n_bins)\n", + "h_bins = np.linspace(h_range[0], h_range[1], h_n_bins, dtype=np.float32)\n", "\n", "h_spectra = dict()\n", "edges = dict()\n", @@ -324,7 +342,7 @@ " # Jungfrau gains 0[00], 1[01], 3[11]\n", " # Change low gain to 2 for indexing purposes.\n", " g[g==3] = 2\n", - "\n", + " m = 0\n", " # Select memory cells\n", " if memory_cells > 1:\n", " \"\"\"\n", @@ -341,7 +359,7 @@ " m[m==255] = 0\n", " offset_map_cell = offset_map[:, m, ...] # [16 + empty cell, x, y]\n", " else:\n", - " offset_map_cell = offset_map[:, 0, ...]\n", + " offset_map_cell = offset_map[:, m, ...]\n", "\n", " # Offset correction\n", " offset = np.choose(g, offset_map_cell)\n", @@ -349,8 +367,7 @@ " d -= offset\n", " # Sort correct data based on memory cell order.\n", " sort_indices = np.argsort(m)\n", - " data_corr[index, ...] = d[sort_indices, ...]\n", - " " + " data_corr[index, ...] = d[sort_indices, ...]" ] }, { @@ -377,13 +394,13 @@ " offset_map = const_data[da][\"Offset10Hz\"]\n", " run_folder = in_folder / f'r{run:04d}'\n", " ## Offset subtraction & Histogram filling\n", - " ### performs offset subtraction andchunk_trains fills the histogram\n", + " ### performs offset subtraction and chunk_trains fills the histogram\n", " ### looping on individual files because single photon runs can be very large\n", - " for dc_chunk in RunDirectory(\n", - " run_folder, include=f\"*{da}*\"\n", - " ).split_trains(trains_per_part=chunked_trains):\n", + " chunks_list = list(RunDirectory(\n", + " run_folder, include=f\"*{da}*\").split_trains(trains_per_part=chunked_trains))\n", + " print(f\"Processing raw data and filling histogram in {len(chunks_list)} chunks\")\n", "\n", - " trains = dc_chunk.get_array(det_src, 'data.trainId')\n", + " for dc_chunk in chunks_list:\n", " memcells = dc_chunk.get_array(\n", " det_src,\n", " 'data.memoryCell',\n", @@ -392,32 +409,29 @@ " adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)\n", " gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)\n", "\n", - " # gain = gain.where(gain < 2, other = 2).astype(np.int16)\n", " step_timer.start()\n", - "\n", + " # gain = gain.where(gain < 2, other = 2).astype(np.int16)\n", " # Allocate shared arrays for corrected data. Used in `correct_train()`\n", " data_corr = context.alloc(shape=adc.shape, dtype=np.float32)\n", " context.map(correct_train, adc)\n", - " step_timer.done_step(\"correct_train\")\n", - " step_timer.start()\n", - " chunks = jungfrau_ff.chunk_Multi([data_corr,], block_size)\n", + " step_timer.done_step(\"correcting trains per chunk\", print_step=False)\n", "\n", - " with multiprocessing.Pool() as pool:\n", - "\n", - " partial_fill = partial(jungfrau_ff.fill_histogram, h_bins)\n", - " r_maps = pool.map(partial_fill, chunks)\n", + " step_timer.start()\n", + " chunks = jungfrau_ff.chunk_multi(data_corr, block_size)\n", + " ch_inp = [(c, h_bins) for c in chunks]\n", + " with multiprocessing.Pool(processes=min(n_cpus, len(chunks))) as pool:\n", + " results = pool.starmap(jungfrau_ff.fill_histogram, ch_inp)\n", "\n", - " for i, r in enumerate(r_maps):\n", - " h_chk, e_chk = r\n", - " if edges[da] is None:\n", - " edges[da] = np.array(e_chk)\n", + " for i, (h_chk, e_chk) in enumerate(results):\n", + " if edges[da] is None:\n", + " edges[da] = e_chk\n", "\n", - " n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])\n", - " irow = int(np.floor(i/n_blocks_col)) * block_size[0]\n", - " icol = i%n_blocks_col * block_size[1]\n", + " n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])\n", + " irow = int(np.floor(i/n_blocks_col)) * block_size[0]\n", + " icol = i%n_blocks_col * block_size[1]\n", + " h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk\n", "\n", - " h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk\n", - " step_timer.done_step(\"Histogram created\")" + " step_timer.done_step(\"Histogram created per chunk\", print_step=False)" ] }, { @@ -446,7 +460,7 @@ "for da in karabo_da:\n", " # transpose h_spectra for the following cells.\n", " h_spectra[da] = h_spectra[da]\n", - " fout_h_path = f'{out_folder}/R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5'\n", + " fout_h_path = out_folder/ f\"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5\"\n", " hists = h_spectra[da]\n", " with h5file(fout_h_path, 'w') as fout_h:\n", " print(f\"Saving histograms at {fout_h_path}.\")\n", @@ -463,6 +477,39 @@ " fout_h.attrs[\"creation_time\"] = str(creation_time)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_and_fill_map(r_maps, shape, dtype=np.float32):\n", + "\n", + " g0_map = np.zeros(shape, dtype=dtype)\n", + " sigma_map = np.zeros(shape, dtype=dtype)\n", + " chi2ndf_map = np.zeros(shape, dtype=dtype)\n", + " alpha_map = np.zeros(shape, dtype=dtype)\n", + " n_blocks_col = int(shape[-1] / block_size[1])\n", + "\n", + " for i, (g0_chk, sigma_chk, chi2ndf_chk, alpha_chk) in enumerate(r_maps):\n", + "\n", + " irow = int(np.floor(i / n_blocks_col)) * block_size[0]\n", + " icol = i % n_blocks_col * block_size[1]\n", + "\n", + " slice_obj = (\n", + " ...,\n", + " slice(irow, irow + block_size[0]),\n", + " slice(icol, icol + block_size[1])\n", + " )\n", + "\n", + " g0_map[slice_obj] = g0_chk\n", + " sigma_map[slice_obj] = sigma_chk\n", + " chi2ndf_map[slice_obj] = chi2ndf_chk\n", + " alpha_map[slice_obj] = alpha_chk\n", + "\n", + " return g0_map, sigma_map, chi2ndf_map, alpha_map" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -480,60 +527,45 @@ "\n", "for da in karabo_da:\n", " _edges = np.array(edges[da])\n", - " x = (_edges[1:] + _edges[:-1]) / 2.0\n", + " bin_center = (_edges[1:] + _edges[:-1]) / 2.0\n", "\n", - " x, _h_spectra = jungfrau_ff.set_histo_range(x, h_spectra[da], fit_h_range)\n", - " print(f'adjusting histogram range: {x[0]} - {x[-1]}')\n", + " bin_center, _h_spectra = jungfrau_ff.set_histo_range(bin_center, h_spectra[da], fit_h_range)\n", + " print(f'adjusting histogram range: {bin_center[0]} - {bin_center[-1]}')\n", " print(f'Histo shape updated from {h_spectra[da].shape} to {_h_spectra.shape}')\n", - " print(f'x-axis: {x.shape}')\n", - " chunks = jungfrau_ff.chunk_Multi([_h_spectra], block_size)\n", - " pool = multiprocessing.Pool()\n", + " print(f'x-axis: {bin_center.shape}')\n", + " chunks = jungfrau_ff.chunk_multi(_h_spectra, block_size)\n", "\n", " partial_fit = partial(\n", " jungfrau_ff.fit_histogram,\n", - " x,\n", + " bin_center,\n", " fit_func,\n", " n_sigma,\n", - " rebin,\n", + " rebin_factor,\n", " ratio,\n", " const_data[da][\"Noise10Hz\"],\n", + " initial_sigma,\n", " )\n", - " print(\"starting spectra fit\")\n", + " print(f\"Starting spectra fit for {len(chunks)} chunks\")\n", + "\n", " step_timer.start()\n", " with multiprocessing.Pool() as pool:\n", " r_maps = pool.map(partial_fit, chunks)\n", " step_timer.done_step(\"r_maps calculation\")\n", "\n", " step_timer.start()\n", - " g0_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n", - " sigma_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n", - " chi2ndf_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n", - " alpha_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n", - " for i, r in enumerate(r_maps):\n", - " g0_chk, sigma_chk, chi2ndf_chk, alpha_chk = r\n", - "\n", - " n_blocks_col = int(g0_map.shape[-1] / block_size[1])\n", - " irow = int(np.floor(i / n_blocks_col)) * block_size[0]\n", - " icol = i % n_blocks_col * block_size[1]\n", - "\n", - " g0_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = g0_chk\n", - " sigma_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = sigma_chk\n", - " chi2ndf_map[\n", - " ..., irow : irow + block_size[0], icol : icol + block_size[1]\n", - " ] = chi2ndf_chk\n", - " alpha_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = alpha_chk\n", + " map_shape = (memory_cells, sensor_size[0], sensor_size[1])\n", + " g0_map, sigma_map, chi2ndf_map, alpha_map = create_and_fill_map(r_maps, map_shape)\n", " step_timer.done_step(\"Finished fitting\")\n", - " fout_path = f\"{out_folder}/{fout_temp.format(da)}\"\n", "\n", " step_timer.start()\n", + " fout_path = out_folder / fout_temp.format(da)\n", " with h5file(fout_path, \"w\") as fout:\n", " dset_chi2 = fout.create_dataset(\"chi2map\", data=np.transpose(chi2ndf_map))\n", " dset_gmap_fit = fout.create_dataset(\"gainMap_fit\", data=np.transpose(g0_map))\n", " dset_std = fout.create_dataset(\"sigmamap\", data=np.transpose(sigma_map))\n", " dset_alpha = fout.create_dataset(\"alphamap\", data=np.transpose(alpha_map))\n", - " dset_noi = fout.create_dataset(\n", - " \"noise_map\",\n", - " data=const_data[da][\"Noise10Hz\"])\n", + " dset_noi = fout.create_dataset(\"noise_map\", data=const_data[da][\"Noise10Hz\"])\n", + "\n", " fout.attrs[\"memory_cells\"] = memory_cells\n", " fout.attrs[\"integration_time\"] = integration_time\n", " fout.attrs[\"bias_voltage\"] = bias_voltage\n", @@ -561,9 +593,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.11" + "version": "3.11.9" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 } diff --git a/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb index 18b9c29c84232d1f514e60bddf6b34a621e96090..996b526210ebd5b5836a6a40dfdd0ab82cf96ad6 100755 --- a/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_Create_Gain_maps_NBC.ipynb @@ -4,17 +4,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Create Gain Map\n", + "# Jungfrau Detector Flatfield Gain Calibration - Part 2: Gain Map Generation\n", "\n", "Author: European XFEL Detector Group, Version: 1.0\n", "\n", - "Converts the map with fit value of the photon peak into a gain conversion factor map.\n", - "\n", - "Calculates the bad pixels map according to:\n", - "\n", - "- Failed fit\n", - "- Not enough entries in the fit\n", - "- Gain value deviation too high" + "This notebook performs the final steps in the Jungfrau detector flatfield gain calibration process, focusing on gain map creation and refinement.\n", + "\n", + "#### Overview\n", + "\n", + "1. Load Fit Results: Import photon peak fit data from previous processing step (Histogram creation and Fitting)\n", + "2. Retrieve Old Gain Map: Access previous gain calibration from database or file\n", + "3. Calculate New Gain Map:\n", + " - Use fit results for high gain (G0)\n", + " - Apply gain ratios for medium (G1) and low (G2) gains\n", + "4. Evaluate Bad Pixels:\n", + " - Identify pixels with failed fits\n", + " - Flag pixels with insufficient data\n", + " - Detect pixels with high gain deviation\n", + "5. Correct Bad Pixels: Replace identified bad pixels with median values\n", + "6. Generate Bad Pixel Map: Create a map of all identified bad pixels\n", + "7. Save:\n", + " - Store new gain maps for all gain levels\n", + " - Save bad pixel map\n", + " - Optionally inject new calibration maps into database\n", + "8. Visualize Results: Plot gain maps and bad pixel distributions" ] }, { @@ -24,7 +37,8 @@ "outputs": [], "source": [ "in_folder = '/gpfs/exfel/exp/MID/202330/p900322/raw' # RAW data path, required\n", - "fit_data_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps\" # Output path for gain data, required\n", + "out_folder = \"/gpfs/exfel/data/scratch/ahmedk/test/jf_ff/gain_maps\" # Output path for gain data, required\n", + "fit_data_folder = \"\" # One can add fit data files in a separate path an point to it here.\n", "metadata_folder = '' # Directory containing calibration_metadata.yml when run by xfel-calibrate\n", "runs = [94] # Number of high gain\n", "\n", @@ -125,7 +139,9 @@ "outputs": [], "source": [ "run = runs[0]\n", - "fit_data_folder = Path(fit_data_folder)\n", + "out_folder = Path(out_folder)\n", + "if not fit_data_folder:\n", + " fit_data_folder = out_folder\n", "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n", "file_loc = raw_data_location_string(proposal=proposal, runs=runs)\n", "report = get_report(metadata_folder)\n", diff --git a/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb b/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb index 0faa46cd48713680a71ecc81889834e3e731dd5b..9e6021a32ef7b0846432f4041613a04938ecb298 100644 --- a/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb +++ b/notebooks/Jungfrau/Jungfrau_gain_Spectra_Fit_Summary_NBC.ipynb @@ -38,22 +38,25 @@ "outputs": [], "source": [ "import math\n", + "import multiprocessing as mp\n", "import warnings\n", + "from IPython.display import Markdown, display\n", + "from logging import warning\n", "from pathlib import Path\n", "\n", "warnings.filterwarnings('ignore')\n", "\n", - "from h5py import File as h5file\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", + "from h5py import File as h5file\n", "\n", "matplotlib.use(\"agg\")\n", "%matplotlib inline\n", "\n", + "from cal_tools.calcat_interface import CalCatApi\n", "from cal_tools.plotting import init_jungfrau_geom\n", - "from cal_tools.restful_config import calibration_client\n", - "from cal_tools.calcat_interface import CalCatApi" + "from cal_tools.restful_config import calibration_client" ] }, { @@ -76,7 +79,16 @@ "calcat = CalCatApi(client=calcat_client)\n", "detector_id = calcat.detector(karabo_id)['id']\n", "da_mapping = calcat.physical_detector_units(detector_id, pdu_snapshot_at=creation_time)\n", - "da_to_pdu = {k: v[\"physical_name\"] for k, v in da_mapping.items()}" + "da_to_pdu = {k: v[\"physical_name\"] for k, v in da_mapping.items()}\n", + "run = runs[0] # TODO: Update for multiple runs\n", + "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Histograms for all cells for each module" ] }, { @@ -85,37 +97,194 @@ "metadata": {}, "outputs": [], "source": [ - "proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]\n", - "run = runs[0] # TODO this will need to be fixed when I start implementing multiple runs.\n", - "stacked_constants = np.full(geom.expected_data_shape, np.nan) # nmods, 512, 1024\n", - "\n", - "for i, da in enumerate (da_to_pdu.keys()):\n", - " with h5file(\n", - " Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func),\n", - " 'r'\n", - " ) as f:\n", - " # f[g0_fit_dataset] shape is 1024, 512, mem_cells\n", - " stacked_constants[i] = np.moveaxis(\n", - " np.mean(np.array(f[g0_fit_dataset]), axis=-1), 0, 1)\n", - " \n", - "fig, ax = plt.subplots(figsize=(18, 10))\n", - "vmin, vmax = np.percentile(stacked_constants, [5, 95])\n", - "geom.plot_data_fast(\n", - " stacked_constants,\n", - " ax=ax,\n", - " vmin=vmin,\n", - " vmax=vmax,\n", - " colorbar={'shrink': 1, 'pad': 0.01},\n", - ")\n", - "ax.set_title(f'{karabo_id} - One photon peak position', size=18)\n", + "def process_histogram(file_path):\n", + " try:\n", + " with h5file(file_path, 'r') as f:\n", + " histos = f[\"histos\"][:]\n", + " edges = f[\"edges\"][:]\n", + " except Exception as e:\n", + " warning(f\"Error while loading Histogram file {file_path}: {e}\")\n", + "\n", + " return None, None\n", + "\n", + " bin_centers = (edges[1:] + edges[:-1]) / 2\n", + " mean_histos = histos.mean(axis=(2, 3)) # Shape: (bins, cells)\n", + "\n", + " return bin_centers, mean_histos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ncols = min(4, nmods)\n", + "nrows = math.ceil(nmods / ncols)\n", + "\n", + "fig, axs = plt.subplots(\n", + " nrows, ncols, figsize=((5*ncols)//1.8, (5*nrows + 1)//1.8), squeeze=False)\n", + "axs = axs.flatten()\n", + "\n", + "# Use a consistent color cycle for all subplots\n", + "custom_colors = [\n", + " '#0000FF', '#FF0000', '#00FF00', '#FFFF00', '#FF00FF', '#FFA500', \n", + " '#800080', '#008000', '#000080', '#FFC0CB', '#A52A2A', '#808080', \n", + " '#FFD700', '#8B4513', '#FF4500', '#2E8B57'\n", + "]\n", + "\n", + "with mp.Pool(processes=min(mp.cpu_count(), nmods)) as pool:\n", + " file_paths = [Path(out_folder) / histo_temp.format(run, proposal.upper(), da) for da in da_to_pdu.keys()]\n", + " results = pool.map(process_histogram, file_paths)\n", + "\n", + "for i, ((da, pdu), (bin_centers, mean_histos)) in enumerate(zip(da_to_pdu.items(), results)):\n", + " ax = axs[i]\n", + "\n", + " # Missing histogram data for module\n", + " if bin_centers is None:\n", + " continue\n", + "\n", + " for m in range(mean_histos.shape[1]): # Iterate over cells\n", + " ax.semilogy(\n", + " bin_centers, mean_histos[:, m],\n", + " color=custom_colors[m],\n", + " # Create only for the first subplot as legend is shared.\n", + " label=f'Cell {m}' if i == 0 else \"\"\n", + " )\n", + "\n", + " ax.set_title(f\"{da} ({pdu})\")\n", + " ax.set_xlabel(\"ADC value\")\n", + " ax.set_ylabel(\"Counts\")\n", + "\n", + "# Hide unused subplots\n", + "for i in range(nmods, len(axs)):\n", + " axs[i].set_visible(False)\n", + "\n", + "plt.tight_layout()\n", + "\n", + "# Add a single legend at the top of the figure\n", + "handles, labels = axs[0].get_legend_handles_labels()\n", + "fig.legend(\n", + " handles, labels, loc='upper center', ncol=min(16, 8), \n", + " bbox_to_anchor=(0.5, 1.05), fontsize='small')\n", + "plt.subplots_adjust(top=0.9)\n", "plt.show()" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Histogram data for all cells for each module" + "display(Markdown(f\"## Display fitting results using {fit_func}\"))\n", + "\n", + "def plot_stacked_heatmap(geom, stacked_constants, title):\n", + " _, ax = plt.subplots(figsize=(8, 6))\n", + " vmin, vmax = np.nanpercentile(stacked_constants, [5, 95])\n", + " geom.plot_data(\n", + " stacked_constants,\n", + " ax=ax,\n", + " vmin=vmin,\n", + " vmax=vmax,\n", + " interpolation=\"None\",\n", + " colorbar={'shrink': 1, 'pad': 0.01, 'label': title},\n", + " )\n", + " ax.set_title(title, size=12)\n", + " plt.show()\n", + "\n", + "\n", + "def plot_cells_comparison(data, title):\n", + " n_cells = data.shape[-1]\n", + "\n", + " if n_cells == 1: # no need to plot for single cell\n", + " return\n", + "\n", + " # For multiple cells, create a grid layout\n", + " n_cols = min(4, n_cells) # Max 4 columns\n", + " n_rows = math.ceil(n_cells / n_cols)\n", + "\n", + " fig, axes = plt.subplots(\n", + " n_rows, n_cols, figsize=(5*n_cols//2, 5*n_rows//2))\n", + " fig.suptitle(title, fontsize=12)\n", + "\n", + " # Flatten axes array for easy indexing\n", + " axes = axes.flatten() if n_cells > 1 else [axes]\n", + " vmin, vmax = np.nanpercentile(data, [5, 95])\n", + "\n", + " for i in range(n_cells):\n", + " ax = axes[i]\n", + " im = ax.imshow(\n", + " data[..., i],\n", + " cmap='viridis',\n", + " vmin=vmin,\n", + " vmax=vmax,\n", + " interpolation=\"None\",\n", + " )\n", + " ax.set_title(f'Cell {i}', size=10)\n", + " plt.colorbar(im, ax=ax)\n", + "\n", + " # Hide unused subplots\n", + " for i in range(n_cells, len(axes)):\n", + " axes[i].set_visible(False)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "\n", + "def plot_histogram_of_values(data, title, bins=100):\n", + " data = data.flatten()\n", + "\n", + " # Count failed fittings. -1000 and -1 used for failed fitting.\n", + " failed_percentage = (np.sum((data == -1000) | (data == -1)) / len(data)) * 100\n", + " \n", + " # Separate valid data and failed fittings\n", + " valid_data = data[(data != -1000) & (data != -1)]\n", + " plt.figure(figsize=(6, 3))\n", + "\n", + " # Use Interquartile Range (IQR) for defining histogram range.\n", + " q1, q3 = np.percentile(valid_data, [25, 75])\n", + " iqr = q3 - q1\n", + " lower = max(q1 - 1.5 * iqr, np.min(valid_data))\n", + " upper = min(q3 + 1.5 * iqr, np.max(valid_data))\n", + "\n", + " plt.hist(data, bins=bins, range=(lower, upper), edgecolor='black')\n", + " plt.xlabel('Value')\n", + " plt.ylabel('Count')\n", + " plt.title(title, size=12)\n", + "\n", + " mean = np.mean(valid_data)\n", + " median = np.median(valid_data)\n", + " plt.axvline(\n", + " mean,\n", + " color='r',\n", + " linestyle='dashed',\n", + " linewidth=2,\n", + " label=f'Mean: {mean:.2f}'\n", + " )\n", + " plt.axvline(\n", + " median,\n", + " color='g',\n", + " linestyle='dashed',\n", + " linewidth=2,\n", + " label=f'Median: {median:.2f}'\n", + " )\n", + "\n", + " # Add text box with failed fitting percentage.\n", + " plt.text(\n", + " 0.05, 0.95,\n", + " f\"Failed Fittings:\\n {failed_percentage:.2f}%\",\n", + " transform=plt.gca().transAxes, \n", + " verticalalignment='top',\n", + " bbox=dict(\n", + " boxstyle='round',\n", + " facecolor='white',\n", + " alpha=0.8),\n", + " fontsize=10)\n", + "\n", + " plt.legend()\n", + " plt.tight_layout()\n", + " plt.show()" ] }, { @@ -124,34 +293,38 @@ "metadata": {}, "outputs": [], "source": [ - "if nmods > 4:\n", - " fixed_cols = 4\n", - " row, col = math.ceil(nmods / fixed_cols), 4\n", - "else:\n", - " row, col = 1, nmods\n", - "\n", - "fig, axs = plt.subplots(row, col, figsize=(20, 10)) # Adjust for spatial histograms\n", - "axs = axs.ravel()\n", - "for i, da in enumerate (da_to_pdu.keys()):\n", - " with h5file(\n", - " Path(out_folder) / histo_temp.format(run, proposal.upper(), da),\n", - " 'r'\n", - " ) as f:\n", - " histos = f[\"histos\"][:]\n", - " row, col = divmod(i, 4)\n", - " for m in range(histos.shape[1]):\n", - " cell_hist = histos[m]\n", - " axs[i].plot(cell_hist.ravel(), label=f'Cell {m}')\n", - " axs[i].set_title(f\"{da} ({da_to_pdu[da]})\")\n", - "\n", - "for i, ax in enumerate(axs):\n", - " if i > nmods-1:\n", - " ax.set_visible(False) # Hide unused subplots\n", - "# Create a legend for the whole figure\n", - "handles, labels = axs[0].get_legend_handles_labels()\n", - "fig.legend(handles, labels, loc=\"center right\" if nmods > 4 else \"upper right\")\n", - "plt.tight_layout(pad=3)\n", - "plt.show()" + "# Define datasets to plot\n", + "datasets = {\n", + " \"gainMap_fit\": 'Single Photon Peak Position (ADU)',\n", + " 'sigmamap': 'Peak Width (σ)',\n", + " 'alphamap': 'Charge Sharing Probability (α)', # 'Charge sharing parameter'\n", + " 'chi2map': 'Goodness of Fit (χ²/ndf)' # 'Chi-square of fit'\n", + "}\n", + "\n", + "for dataset, title in datasets.items():\n", + " stacked_constants = np.full(geom.expected_data_shape, np.nan)\n", + " all_data = []\n", + " av_modules = []\n", + "\n", + " for i, da in enumerate(da_to_pdu.keys()):\n", + " file_path = Path(out_folder) / spectra_fit_temp.format(run, proposal.upper(), da, fit_func)\n", + " try:\n", + " with h5file(file_path, 'r') as f:\n", + " data = np.array(f[dataset])\n", + " stacked_constants[i] = np.moveaxis(np.mean(data, axis=-1), 0, 1)\n", + " all_data.append(data)\n", + " av_modules.append(da)\n", + " except Exception as e:\n", + " warning(f\"Error while loading Fitting file {file_path}: {e}\")\n", + "\n", + " # Plot stacked heatmap\n", + " plot_stacked_heatmap(geom, stacked_constants, title)\n", + "\n", + " plot_histogram_of_values(\n", + " np.concatenate(all_data), f'Distribution of {title}')\n", + "\n", + " # Plot cell comparison for the the last module module\n", + " plot_cells_comparison(all_data[-1], f'{title} - Module {av_modules[-1]}({da_to_pdu[av_modules[-1]]})')" ] } ], @@ -171,10 +344,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.11" - }, - "orig_nbformat": 4 + "version": "3.11.8" + } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/src/cal_tools/calcat_interface.py b/src/cal_tools/calcat_interface.py index fa9425ee5e076a1d42f284d9536a199b17b009d0..8da46665aef0dc25cf9afdbbc2ff0e684589b68f 100644 --- a/src/cal_tools/calcat_interface.py +++ b/src/cal_tools/calcat_interface.py @@ -1243,9 +1243,9 @@ class JUNGFRAU_CalibrationData(CalibrationData): sensor_bias_voltage, memory_cells, integration_time, - exposure_timeout, gain_setting, gain_mode=None, + exposure_timeout=25, sensor_temperature=291, pixels_x=1024, pixels_y=512, diff --git a/src/cal_tools/jungfrau/jungfrau_ff.py b/src/cal_tools/jungfrau/jungfrau_ff.py index 89d19581fad9e54ea4664b46df097f1b74b4bd9a..c4f6d33b7854f4d44ee95716da512cb68a2fae11 100644 --- a/src/cal_tools/jungfrau/jungfrau_ff.py +++ b/src/cal_tools/jungfrau/jungfrau_ff.py @@ -1,137 +1,120 @@ import numpy as np -from XFELDetAna.detectors.jungfrau.fitFuncs import ( - charge_sharing, - chi_square_fit, - double_peak, - gauss, - histogram_errors, -) -import time +from scipy.special import erf +from iminuit import Minuit -def chunk_trains(data, train_chk): +def histogram_errors(bin_counts): """ - chunks data along the train dimension - - arguments: - * data (list): input data arrays, each at least 1-D - * train_chk (int): number of trains per chunk - - retruns: list fo chunked data - """ - n_trains = int(data[0].shape[0]) - chunks = [] - for i in range(0, n_trains, train_chk): - this_chunk = [] - this_chunk = [data[0][i:min(i+train_chk, n_trains)]] - for j in range(1, len(data)): - if data[j] is not None: - this_chunk.append(data[j][i:min(i+train_chk, n_trains)]) - else: - this_chunk.append(None) - - chunks.append(this_chunk) - return chunks + Calculate the errors for histogram bins. + This function computes the error for each bin in a histogram, + treating positive and negative bin counts differently. -def chunk_Multi(data, block_size): - """ - chunks input array along the 'row' and 'col' dimensions - - arguments: - * data (list): input data arrays, each at least 2-D - * block_size (list, int): [chunk_row, chunk_col], dimension of the 2-D chunk - - retruns: list fo chunked data + Args: + x (np.ndarray): Input array of histogram bin counts. + + Returns: + np.ndarray: Array of calculated errors. """ - intervals_r = range(0, data[0].shape[-2], block_size[0]) - intervals_c = range(0, data[0].shape[-1], block_size[1]) - - chunks = [] - - for irow in intervals_r: - for icol in intervals_c: - this_chunk = [] - this_chunk = [data[0][..., irow:irow + block_size[0], icol:icol + block_size[1]]] - for j in range(1, len(data)): - if data[j] is not None: - this_chunk.append(data[j][..., irow:irow + block_size[0], icol:icol + block_size[1]]) - else: - this_chunk.append(None) - chunks.append(this_chunk) + out = np.ones_like(bin_counts) - return chunks + # Handle positive values + positive_mask = bin_counts > 0 + out[positive_mask] = np.sqrt(bin_counts[positive_mask]) + + # Handle negative values + negative_mask = bin_counts < 0 + out[negative_mask] = np.sqrt(-bin_counts[negative_mask]) + return out -def subtract_offset(offset_map, _inp): +def chunk_multi(data, block_size): """ - perform offset subtraction on raw data + Chunks input array along the 'row' and 'col' dimensions. Args: - * offset_map (xarray, float): map with offset constants, - with shape (3, memory_cells, row, col) - * _inp (list): input data as: - * _inp[0]: raw images, with shape (trains, memory_cells, row, col) - * _inp[1]: gain bit map, with shape (trains, memory_cells, row, col) + data (ndarray): input data array, at least 2D. + block_size (list or tuple of int): [chunk_row, chunk_col], + dimension of the 2-D chunk. + + Returns: list of chunked data. - Returns: offset subtracted images + Raises: + ValueError: If input data is not at least 2D or block_size is invalid. """ - imgs = _inp[0] - gbit = _inp[1] + if not isinstance(data, np.ndarray): + raise ValueError("Input data must be a numpy array.") + if data.ndim < 2: + raise ValueError("Input data must be at least 2D.") + if ( + not isinstance(block_size, (list, tuple)) or + len(block_size) != 2 or + (not all(isinstance(x, int) and x > 0 for x in block_size)) + ): + raise ValueError( + "block_size must be a list or tuple of " + "two positive integers.") + + rows, cols = data.shape[-2:] + chunk_rows, chunk_cols = block_size + + if chunk_rows > rows or chunk_cols > cols: + raise ValueError( + "block_size dimensions cannot be larger " + "than the input array dimensions.") - n_cells = imgs.shape[1] + chunks = [] + for i in range(0, rows, chunk_rows): + for j in range(0, cols, chunk_cols): + chunk = data[ + ..., + i: i+chunk_rows, + j: j+chunk_cols + ] + chunks.append(chunk) - for cell in range(n_cells): - this_cell_gbit = gbit.sel(cell=cell) + return chunks - this_cell_off = offset_map[:, cell] - _o = np.choose( - this_cell_gbit, ( - np.expand_dims(this_cell_off[0], axis=0), - np.expand_dims(this_cell_off[1], axis=0), - np.expand_dims(this_cell_off[2], axis=0) - ) - ) +def fill_histogram(image_data, histogram_bins): + """ + Fill a histogram from input images. + This function creates a histogram with shape + (n_bins-1, memory_cells, n_rows, n_cols) from the input image data. - imgs.loc[dict(cell=cell)] -= _o + Args: + histogram_bins (list, float): the binning of the x-axis + image_data (np.ndarray): image data to histogram + (trains, memory_cells, row, col). - return imgs + Returns: histogram bin counts, bin edges. -def fill_histogram(h_bins, _inp): + Raises + TypeError: If imgs is not a numpy ndarray. + ValueError: If imgs is not a 4D array. """ - fills an histogram with shape (n_bins-1, memory_cells, n_rows, n_cols) from input images - - arguments: - * h_bins (list, float): the binning of the x-axis - * _inp (list): data to be histogrammed, as: - * _inp[0]: images, with shape (trains, memory_cells, row, col) + if not isinstance(image_data, np.ndarray): + raise TypeError("Expected imgs numpy ndarray type.") + + if image_data.ndim != 4: + raise ValueError("Expected 4D imgs array.") + + n_cells, n_rows, n_cols = image_data.shape[1:] + + h_chk = np.zeros( + (len(histogram_bins)-1, n_cells, n_rows, n_cols), + dtype=np.int32) - retuns: histogram bin counts, bin edges - - """ - imgs = _inp[0] # [trains, cells, rows, cols] - n_cells = imgs.shape[1] - n_rows = imgs.shape[2] - n_cols = imgs.shape[3] - - h_chk = np.zeros((len(h_bins)-1, n_cells, n_rows, n_cols), dtype=np.int32) - e_chk = None - for cell in range(n_cells): for row in range(n_rows): for col in range(n_cols): - this_cell = imgs[:, cell, ...] - this_pix = this_cell[:, row, col] - - _h, _e = np.histogram(this_pix, bins=h_bins) + this_pix = image_data[:, cell, row, col] + _h, _e = np.histogram(this_pix, bins=histogram_bins) h_chk[..., cell, row, col] += _h - if e_chk is None: - e_chk = np.array(_e) - - return h_chk, e_chk + e_chk = np.array(_e) + return h_chk, e_chk # peak finder to find the first photon peak and/or the pedestal peak @@ -141,15 +124,16 @@ def _peak_position(x, h, thr=75, dist=30., ratio=0.4): """ finds peaks in the histogram very rough, self-made function, should be replaced by better one - + arguments: * x (list, float): histogram bin centers * h (list, float): histogram bin counts * thr (int): lower threshold along x-axis in ADC units to look for a peak * dist (float): minimal distance between the last found peak and the next * ratio (float): minimal ratio between the last found peak and the next - - returns: list of sorted peak positions along the x-axis, index of the peak with lowes value along x-axis + + returns: list of sorted peak positions along the x-axis, index + of the peak with lowes value along x-axis """ imin = 0 peak_bins = [] @@ -163,7 +147,7 @@ def _peak_position(x, h, thr=75, dist=30., ratio=0.4): peak_bins.append(sorted_i[-1] + imin) low_h = ratio * h[sorted_i[-1] + imin] for j in range(1, len(sorted_i)): - ibin = sorted_i[-1 -j] + imin + ibin = sorted_i[-1 - j] + imin if h[ibin] > low_h: min_dist = (x[peak_bins] - x[ibin])**2. > dist**2. if np.all(min_dist): @@ -176,317 +160,481 @@ def _peak_position(x, h, thr=75, dist=30., ratio=0.4): return peak_bins, imin -def fit_charge_sharing(x, y, yerr, sigma0, n_sigma, ratio): +def set_histo_range(bin_centers, histogram, h_range): """ - fits histogram with single photon function with charge sharing tail + Reduces the histogram bin axis to the range specified - arguments: - * x (array, float): x values - * y (array, float): y values - * yerr (array, float): errors of the y values - * sigma0 (float): rough estimate of peak variance - * n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0 - * ratio (float): ratio parameter of the peak finder - - returns: peak position, peak variance, chi2/ndf, charge sharing parameter - """ + Args: + bin_centers (array, float): the bin centers array + histogram (array, integers): the histogram with shape + (bins, cells, columns, row) + h_range (list, float): the (liminf, limsup) of the desired range - norm = np.sum(y) * (x[1] - x[0]) - alpha0 = 0.3 - thr = n_sigma * sigma0 - _peaks, imin = _peak_position(x, y, thr, ratio=ratio) - - q = -1. - sigma = -1. - alpha = -1. - chi2ndf = -1. - q0 = -1. + Returns: the new bin centers array and the new histogram - if len(_peaks) > 0: - q0 = np.ma.min(x[_peaks]) - - if q0 > -1.: - limit_q = (q0 - sigma0, q0 + sigma0) - limit_sigma = (0.1 * sigma0, 2. * sigma0) - limit_alpha = (0.01 * alpha0, 1.) - limit_norm = (np.sqrt(0.1 * norm), 3. *norm) - - res = chi_square_fit( - charge_sharing, - x, - y, - yerr, - fit_range=(0.5 * q0, q0 + 2. * sigma0), - ped=0., - q=q0, - sigma=sigma0, - alpha=alpha0, - norm=norm, - limit_q=limit_q, - limit_sigma=limit_sigma, - limit_alpha=limit_alpha, - limit_norm=limit_norm, - fix_ped=True, - print_level=0, - pedantic=False, - ) - values = res[0] - errors = res[1] - chi2 = res[2] - ndf = res[3] - is_valid = res[4] - - if is_valid: - q = values['q'] - sigma = values['sigma'] - alpha = values['alpha'] - chi2ndf = chi2/ndf - else: - q = -1000. - sigma = -1000. - alpha = -1000. - chi2ndf = -1000. + """ + if len(h_range) != 2: + raise ValueError("h_ranges should have two values.") + min_val, max_val = sorted(h_range) - return q, sigma, chi2ndf, alpha + i_min = np.abs(bin_centers - min_val).argmin() + i_max = np.abs(bin_centers - max_val).argmin() + # Ensure i_max is greater than i_min + if i_max <= i_min: + i_max = i_min + 1 -def fit_double_charge_sharing(x, y, yerr, sigma0, n_sigma, ratio): - """ - fits histogram with sum of two single photon functions with charge sharing tail - - arguments: - * x (array, float): x values - * y (array, float): y values - * yerr (array, float): errors of the y values - * sigma0 (float): rough estimate of peak variance - * n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0 - * ratio (float): ratio parameter of the peak finder - - returns: peak position, peak variance, chi2/ndf, charge sharing parameter (all of them of the first peak) + return bin_centers[i_min:i_max], histogram[i_min:i_max, ...] - """ - norm = np.sum(y) * (x[1] - x[0]) - alpha0 = 0.3 - thr = n_sigma * sigma0 - _peaks, imin = _peak_position(x, y, thr, ratio=ratio) - - qa = -1. - if len(_peaks) > 0: - qa = np.ma.min(x[_peaks]) - - q = -1. - sigma = -1. - alpha = -1. - chi2ndf = -1. - - if qa > -1.: - - qb = 1.1 * qa - limit_q_a = (qa - sigma0, qa + sigma0) - limit_sigma_a = (0.1 * sigma0, 2. * sigma0) - limit_alpha = (0.01, 1.) - limit_norm_a = (np.ma.sqrt(0.1 *norm), 2. * norm) - limit_q_b = (qb - sigma0, qb + sigma0) - limit_sigma_b = (0.1 * sigma0, 2. * sigma0) - limit_norm_b = (np.ma.sqrt(0.1 * 0.2 * norm), 2 * 0.2 *norm) - - res = chi_square_fit( - double_peak, x, y, yerr, - fit_range=(0.5*qa, qb + sigma0), - ped=0., - q_a=qa, - sigma_a=sigma0, - alpha=alpha0, - norm_a=norm, - q_b=qb, - sigma_b=sigma0, - norm_b=norm, - limit_q_a=limit_q_a, - limit_sigma_a=limit_sigma_a, - limit_alpha=limit_alpha, - limit_norm_a=limit_norm_a, - limit_q_b=limit_q_b, - limit_sigma_b=limit_sigma_b, - limit_norm_b=limit_norm_b, - fix_ped=True, - print_level=0, - pedantic=False, - ) - values = res[0] - errors = res[1] - chi2 = res[2] - ndf = res[3] - is_valid = res[4] - - if is_valid: - q = values['q_a'] - sigma = values['sigma_a'] - alpha = values['alpha'] - chi2ndf = chi2/ndf - else: - q = -1000. - sigma = -1000. - alpha = -1000. - chi2ndf = -1000. - - return q, sigma, chi2ndf, alpha - -def fit_gauss(x, y, yerr, sigma0, n_sigma, ratio): +def rebin_histo(hist, bin_centers, rebin_factor): """ - fits histogram with a gaussian function - - arguments: - * x (array, float): x values - * y (array, float): y values - * yerr (array, float): errors of the y values - * sigma0 (float): rough estimate of peak variance - * n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0 - * ratio (float): ratio parameter of the peak finder - - returns: peak position, peak variance, chi2/ndf, charge sharing parameter (last one alway == 0) - all of them are kept for compatibility with the wrap around function - + Rebin a histogram by combining adjacent bins. + + Args: + hist (np.ndarray): Input histogram values. + bin_edges (np.ndarray): Bin edges or centers. + rebin_factor (int): Rebinning factor. Number of adjacent + bins to combine. + + Returns: + Tuple[np.ndarray, np.ndarray]: Rebinned histogram values and + corresponding bin edges/centers. + + Raises: + ValueError: If the rebinning factor is not an exact divisor + of the number of bins. """ - norm = np.sum(y) * (x[1] - x[0])/np.sqrt(2.*np.pi*sigma0**2) - alpha0 = 0.3 - thr = n_sigma * sigma0 - _peaks, imin = _peak_position(x, y, thr, ratio=ratio) - - q0 = -1000. - if len(_peaks) > 0: - q0 = np.ma.min(x[_peaks]) - - q = -1. - sigma = -1. - alpha = -1. - chi2ndf = -1. - - if q0 > -1000.: - limit_amp = (0.01*norm, 2.*norm) - limit_mean = (q0-sigma0, q0+sigma0) - limit_sigma = (0.1*sigma0, 3.*sigma0) - - res = chi_square_fit( - gauss, x, y, yerr, - fit_range=[q0 - sigma0, q0 + 2.*sigma0], - amp=norm, - mean=q0, - sigma=sigma0, - limit_amp=limit_amp, - limit_mean=limit_mean, - limit_sigma=limit_sigma, - print_level=0, - pedantic=False, - ) - values = res[0] - errors = res[1] - chi2 = res[2] - ndf = res[3] - is_valid = res[4] - - if is_valid: - q = values['mean'] - sigma = values['sigma'] - alpha = 0. - chi2ndf = chi2/ndf - else: - q = -1000. - sigma = -1000. - alpha = -1000. - chi2ndf = -1000. - - return q, sigma, chi2ndf, alpha + if rebin_factor == 1: + return hist, bin_centers + # Check if rebinning is possible + if len(hist) % rebin_factor != 0: + raise ValueError( + "Rebin factor is not an exact divisor " + "of the number of bins.") -def set_histo_range(_x, _h, _h_range): - """ - reduces the histogram bin axis to the range specified - - args: - * _x (array, float): the bin centers array - * _h (array, integers): the histogram with shape (bins, cells, columns, row) - * _h_range (tuple, float): the (liminf, limsup) of the desired range - - returns the new bin centers array and the new histogram - - """ + h_out = None + x_out = bin_centers[::rebin_factor] - i_min = (np.abs(_x - _h_range[0])).argmin() - i_max = (np.abs(_x - _h_range[1])).argmin() - - return _x[i_min:i_max], _h[i_min:i_max] - - -def rebin_histo(h, x, r): - if len(x)%r == 0: - h_out = None - x_out = x[::r] - - for i in range(0, len(h), r): - if h_out is not None: - h_out = np.append(h_out, np.sum(h[i:i+r], axis=0)) - else: - h_out = np.array(np.sum(h[i:i+r], axis=0)) - - else: - raise AttributeError('Rebin factor is not exact divisor of bins!') - + h_out = np.sum( + hist.reshape(-1, rebin_factor), + axis=1, + ) + x_out = bin_centers[::rebin_factor] return h_out, x_out -def fit_histogram(x, _fit_func, n_sigma, rebin, ratio, noise, _inp): +def fit_histogram( + bin_centers, + fit_func, + n_sigma, + rebin, + ratio, + noise, + initial_sigma, + histo, # Added at the end to parallelize with multiprocessing. +): """ - wrap around function for fitting of histogram - - arguments: - * x (list, float): - bin centers along x - * _fit_func (string): which function to use for fit - * CHARGE_SHARING: single peak with charge sharing tail - * CHARGE_SHARING_2: sum of two CHARGE_SHARING - * GAUSS: gaussian function - * n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0 - * ratio (float): ratio parameter of the peak finder - * _input (list): contains the data to preform the fit - * _input[0]: histogram bin counts with shape (n_bins, memory_cells, row, col) - * _input[1]: noise map with shape (3, memory_cells, row, col) - - returns: map of peak values, map of peak variances, map of chi2/ndf, map of charge sharing parameter values + Wrap around function for fitting of histogram + + Args: + bin_centers (list, float): - bin centers along x + fit_func (string): which function to use for fit + - CHARGE_SHARING: single peak with charge sharing tail + - CHARGE_SHARING_2: sum of two CHARGE_SHARING + - GAUSS: gaussian function + n_sigma (int): to calculate threshold of the peak finder as + thr = n_sigma * initial_sigma + ratio (float): ratio parameter of the peak finder + histo (ndarray): histogram bin counts with shape + (n_bins, memory_cells, row, col) + + Returns: + - map of peak values + - map of peak variances + - map of charge sharing parameter values + - map of chi2/ndf """ _funcs = dict( - CHARGE_SHARING=fit_charge_sharing, - GAUSS=fit_gauss, + CHARGE_SHARING=fit_charge_sharing, CHARGE_SHARING_2=fit_double_charge_sharing, + GAUSS=fit_gauss, ) - fit_func = _funcs[_fit_func] - - histo = _inp[0] + fit_func = _funcs[fit_func] n_cells, n_rows, n_cols = histo.shape[1:] - sigma0 = 15. - _init_array = np.zeros((n_cells, n_rows, n_cols), dtype=np.float32) - g0_chk = _init_array.copy() + peak_positions = _init_array.copy() sigma_chk = _init_array.copy() chi2ndf_chk = _init_array.copy() alpha_chk = _init_array.copy() - + for cell in range(n_cells): for row in range(n_rows): for col in range(n_cols): - - y = histo[:, cell, row, col] - y, _x = rebin_histo(y, x, rebin) + y = histo[:, cell, row, col] + y, x_rebinned = rebin_histo(y, bin_centers, rebin) yerr = histogram_errors(y) - + if noise[0, cell, row, col] > 0.: - sigma0 = noise[0, cell, row, col] + initial_sigma = noise[0, cell, row, col] - q, sigma, chi2ndf, alpha = fit_func(_x, y, yerr, sigma0, n_sigma, ratio) + q, sigma, chi2ndf, alpha = fit_func( + x_rebinned, y, yerr, initial_sigma, n_sigma, ratio) - g0_chk[cell, row, col] = q + peak_positions[cell, row, col] = q sigma_chk[cell, row, col] = sigma chi2ndf_chk[cell, row, col] = chi2ndf alpha_chk[cell, row, col] = alpha + return peak_positions, sigma_chk, chi2ndf_chk, alpha_chk + + +def erfbox(x, ped, q, sigma): + """ + Compute the error function box used in charge sharing calculations. + + This function models the box-like shape of charge collection efficiency. + + erfbox is mostly flat between ped and q0 and quickly goes to zero + outside the range [ped, q0]. The function integral is normalized to 1. + The slope of the box sides is given by erf(x)/sigma; if sigma == 0 then the + function goes abruptly to 0 at the edges. + + Args: + x (array-like): Input values + ped (float): Pedestal value + q0 (float): Charge value + sigma (float): Width parameter + + Returns: + array-like: Computed erfbox values + """ + if ped == q: + return np.zeros_like(x) + if ped > q: + q, ped = ped, q + + slope = lambda x: erf(x / abs(sigma)) if sigma != 0 else 2 * np.heaviside(x, 1) # noqa + return (slope(x - ped) - slope(x - q)) * 0.5 / (q - ped) + + +def gauss(x, amp, mean, sigma): + """ + Compute a Gaussian function. + + This function calculates the values of a Gaussian distribution + for given input parameters. + """ + z = (x - mean) / sigma + return amp * np.exp(-0.5 * z**2) + + +def charge_sharing(x, ped, q, sigma, alpha, norm): + """ + Model the detector response to a single photon with charge sharing. + + This function combines a Gaussian peak + (representing full charge collection) with a charge sharing tail. + + Args: + x (array-like): Input charge values + ped (float): Pedestal value (usually 0 for pedestal-subtracted data) + q (float): Peak position (full charge collection) + sigma (float): Peak width + alpha (float): Charge sharing probability + norm (float): Normalization factor + + Returns: + array-like: Modeled detector response + + Notes: More details on JOURNAL OF SYNCHROTRON RADIATION 23, 1462-1473 (2016) + - f(x) = norm * (gaussian_peak + charge_sharing_tail) / ((1 + α)²) + """ + A = ((1. - alpha)**2) / (sigma * np.sqrt(2. * np.pi)) + B = 4. * alpha * (1. - alpha) + C = 4. * alpha**2 + + charge_sh_tail = ( + B - C * np.log1p((x - ped) / (q - ped))) * erfbox(x, ped, q, sigma) + return norm * (charge_sh_tail + gauss(x, A, q, sigma)) / ((1. + alpha)**2) + + +def double_peak(x, ped, q_a, sigma_a, alpha, norm_a, q_b, sigma_b, norm_b): + """ + Model the detector response for two overlapping peaks with charge sharing. + + This function combines two charge sharing models to represent two + overlapping peaks in the detector response. + """ + k_a = charge_sharing(x, ped, q_a, sigma_a, alpha, norm_a) + k_b = charge_sharing(x, ped, q_b, sigma_b, alpha, norm_b) + return k_a + k_b + + +def set_fit_range(x, x1, x2): + """ + Set the range of x values to be used in fitting. + + This function determines the indices of x that fall within the + specified range [x1, x2]. + + Notes: If x2 <= x1, the function ensures at least two points are included + in the range. The returned x values are cast to np.float64 for + higher precision in fitting. + """ + i1 = 0 + i2 = len(x) + i = 0 + + if x2 > x1: + if x1 > x[0]: + while i < len(x) and x[i] < x1: + i += 1 + i1 = i + else: + i1 = 0 + if x2 < x[-1]: + while i < len(x) and x[i] < x2: + i += 1 + i2 = i + else: + i2 = len(x) + else: + i2 = i1 + + if i2 <= i1: + i2 = i1 + 2 + # Use np.float64 for higher precision when fitting. + # With this higher precision we avoid failed fitting + # on very small difference. + return x[i1:i2].astype(np.float64), i1, i2 + - return g0_chk, sigma_chk, chi2ndf_chk, alpha_chk +def fit_charge_sharing( + x, y, yerr, initial_sigma, n_sigma, ratio, alpha0=0.3 +): + """ + Perform charge sharing fit using iminuit optimization. + + This function fits the charge sharing model to the provided data using + chi-square minimization via iminuit. + + Args: + x (array-like): Input charge values + y (array-like): Observed counts or intensities + yerr (array-like): Errors on y values + initial_sigma (float): Initial guess for peak width + n_sigma (float): Number of sigmas for threshold calculation + (not used in this implementation) + ratio (float): Ratio parameter (not used in this implementation) + alpha0: Initial guess for alpha + + Returns: + tuple: (q, sigma, chi2ndf, alpha) + q (float): Fitted peak position + sigma (float): Fitted peak width + chi2ndf (float): Chi-square per degree of freedom + alpha (float): Fitted charge sharing probability + + Notes: + - The pedestal (ped) is fixed to 0 in this implementation. + - n_sigma and ratio parameters are included for compatibility + with the original function signature + but are not used in the current implementation. + """ + norm = np.sum(y) * (x[1] - x[0]) + + # Use _peak_position function to find initial q0 + _peaks, _ = _peak_position(x, y, thr=n_sigma*initial_sigma, ratio=ratio) + if len(_peaks) > 0: + q0 = np.min(x[_peaks]) + else: + return -1, -1, -1, -1 + + x_fit, i1, i2 = set_fit_range(x, 0.5 * q0, q0 + 2. * initial_sigma) + + y_fit = y[i1:i2] + yerr_fit = yerr[i1:i2] + + def cost_function(ped, q, sigma, alpha, norm): + # Add a small constant to yerr_fit to avoid division by zero + epsilon = 1e-10 # Small constant to prevent division by zero + safe_yerr = yerr_fit + epsilon + return np.sum( + ((y_fit - charge_sharing( + x_fit, ped, q, sigma, alpha, norm)) / safe_yerr)**2) + + m = Minuit( + cost_function, + ped=0., q=q0, sigma=initial_sigma, alpha=alpha0, norm=norm, + error_ped=0.1, error_q=0.1*q0, error_sigma=0.1*initial_sigma, + error_alpha=0.1, error_norm=0.1*norm, + fix_ped=True, + limit_q=(q0 - initial_sigma, q0 + initial_sigma), + limit_sigma=(0.1 * initial_sigma, 2. * initial_sigma), + limit_alpha=(0.01 * alpha0, 1.), + limit_norm=(np.sqrt(0.1 * norm), 3. * norm), + errordef=1, # for least squares cost function + print_level=0, + ) + + m.migrad() + + if m.get_fmin().is_valid: + q = m.values['q'] + sigma = m.values['sigma'] + alpha = m.values['alpha'] + chi2ndf = m.fval / (len(x_fit) - len(m.values)) + else: + q, sigma, chi2ndf, alpha = -1000, -1000, -1000, -1000 + return q, sigma, chi2ndf, alpha + + +def fit_double_charge_sharing(x, y, yerr, initial_sigma, n_sigma, ratio): + """ + Fits histogram with sum of two single photon functions with + charge sharing tail. + + Args: + x (array, float): x values + y (array, float): y values + yerr (array, float): errors of the y values + initial_sigma (float): rough estimate of peak variance + n_sigma (int): to calculate threshold of the peak finder as + thr = n_sigma * initial_sigma + ratio (float): ratio parameter of the peak finder + + Returns: + peak position, peak variance, chi2/ndf, charge sharing parameter + (all of them of the first peak) + """ + alpha0 = 0.3 + norm = np.sum(y) * (x[1] - x[0]) + + _peaks, _ = _peak_position(x, y, thr=n_sigma*initial_sigma, ratio=ratio) + if len(_peaks) > 0: + q0 = np.min(x[_peaks]) + else: + return -1, -1, -1, -1 + + q_b = 1.1 * q0 + x_fit, i1, i2 = set_fit_range(x, 0.5 * q0, q_b + initial_sigma) + y_fit = y[i1:i2] + yerr_fit = yerr[i1:i2] + + def cost_function( + ped, q_a, sigma_a, alpha, norm_a, q_b, sigma_b, norm_b + ): + # Add a small constant to yerr_fit to avoid division by zero + epsilon = 1e-10 # Small constant to prevent division by zero + safe_yerr = yerr_fit + epsilon + return np.sum(( + (y_fit - double_peak( + x_fit, ped, q_a, sigma_a, alpha, + norm_a, q_b, sigma_b, norm_b + )) / safe_yerr)**2) + + limit_alpha = (0.01, 1.) + limit_norm_a = (np.sqrt(0.1 * norm), 2. * norm) + limit_q_b = (q_b - initial_sigma, q_b + initial_sigma) + limit_sigma = (0.1 * initial_sigma, 2. * initial_sigma) + limit_norm_b = (np.sqrt(0.1 * 0.2 * norm), 2 * 0.2 * norm) + + m = Minuit( + cost_function, + ped=0., + q_a=q0, + sigma_a=initial_sigma, + alpha=alpha0, + norm_a=norm, + q_b=q_b, + sigma_b=initial_sigma, + norm_b=norm, + error_ped=0.1, error_q_a=0.1*q0, error_sigma_a=0.1*initial_sigma, + error_alpha=0.1, error_norm_a=0.1*norm, + error_q_b=0.1*q_b, error_sigma_b=0.1*initial_sigma, + error_norm_b=0.1*norm, + limit_q_a=(q0 - initial_sigma, q0 + initial_sigma), + limit_sigma_a=limit_sigma, + limit_alpha=limit_alpha, + limit_norm_a=limit_norm_a, + limit_q_b=limit_q_b, + limit_sigma_b=limit_sigma, + limit_norm_b=limit_norm_b, + fix_ped=True, + print_level=0, + errordef=1, # for least squares cost function + ) + + m.migrad() + + if m.get_fmin().is_valid: + q = m.values['q_a'] + sigma = m.values['sigma_a'] + alpha = m.values['alpha'] + chi2ndf = m.fval / (len(x_fit) - len(m.values)) + else: + q, sigma, chi2ndf, alpha = -1000, -1000, -1000, -1000 + return q, sigma, chi2ndf, alpha + + +def fit_gauss(bin_centers, histogram, yerr, initial_sigma, n_sigma, ratio): + """ + Fits histogram with a gaussian function + + Args: + bin_centers (array, float): bin_centers values + histogram (array, float): histogram values + yerr (array, float): errors of the y values + initial_sigma (float): rough estimate of peak variance + n_sigma (int): to calculate threshold of the peak finder as + thr = n_sigma * initial_sigma + ratio (float): ratio parameter of the peak finder + + Returns: + peak position, peak variance, chi2/ndf, charge sharing parameter + (last one alway == 0) + all of them are kept for compatibility with the wrap around function + """ + norm = np.sum(histogram) * (bin_centers[1] - bin_centers[0])/np.sqrt(2.*np.pi*initial_sigma**2) + + _peaks, _ = _peak_position(bin_centers, histogram, thr=n_sigma*initial_sigma, ratio=ratio) + if len(_peaks) > 0: + q0 = np.min(bin_centers[_peaks]) + else: + return -1, -1, -1, -1 + + x_fit, i1, i2 = set_fit_range(bin_centers, q0 - initial_sigma, q0 + 2.*initial_sigma) + y_fit = histogram[i1:i2] + yerr_fit = yerr[i1:i2] + + def cost_function(amp, mean, sigma): + # Add a small constant to yerr_fit to avoid division by zero + epsilon = 1e-10 # Small constant to prevent division by zero + safe_yerr = yerr_fit + epsilon + return np.sum((( + y_fit - gauss( + x_fit, amp, mean, sigma)) / safe_yerr)**2) + + m = Minuit( + cost_function, + amp=norm, mean=q0, sigma=initial_sigma, + error_amp=0.1*norm, error_mean=0.1*initial_sigma, error_sigma=0.1*initial_sigma, + limit_amp=(0.01*norm, 2.*norm), + limit_mean=(q0-initial_sigma, q0+initial_sigma), + limit_sigma=(0.1*initial_sigma, 3.*initial_sigma), + errordef=1, # for least squares cost function + print_level=0, + ) + + m.migrad() + + if m.get_fmin().is_valid: + q = m.values['mean'] + sigma = m.values['sigma'] + alpha = 0. + chi2ndf = m.fval / (len(x_fit) - len(m.values)) + else: + q, sigma, chi2ndf, alpha = -1000, -1000, -1000, -1000 + return q, sigma, chi2ndf, alpha diff --git a/src/cal_tools/step_timing.py b/src/cal_tools/step_timing.py index a68587dcbac336e9a85f7c1fafa5d44d9c823c11..1ce001c8c6cccc5965b320e11238b9362eebf494 100644 --- a/src/cal_tools/step_timing.py +++ b/src/cal_tools/step_timing.py @@ -17,12 +17,13 @@ class StepTimer: self.t0 = t self.t_latest = t - def done_step(self, step_name): + def done_step(self, step_name, print_step=True): """Record a step timing, since the last call to done_step() or start() """ t = perf_counter() self.steps[step_name].append(t - self.t_latest) - print(f"{step_name}: {t - self.t_latest:.01f} s") + if print_step: + print(f"{step_name}: {t - self.t_latest:.01f} s") self.t_latest = t def timespan(self): diff --git a/tests/test_jungfrau_ff.py b/tests/test_jungfrau_ff.py new file mode 100644 index 0000000000000000000000000000000000000000..b15db475166a211506c0a936070d16bb48256ad1 --- /dev/null +++ b/tests/test_jungfrau_ff.py @@ -0,0 +1,287 @@ +import numpy as np +import pytest +from numpy.testing import assert_array_equal, assert_array_almost_equal +from scipy.stats import norm + +from cal_tools.jungfrau.jungfrau_ff import ( + chunk_multi, + fill_histogram, + histogram_errors, + rebin_histo, + set_histo_range, + _peak_position, +) + + +def test_peak_detection_correctness(): + x = np.array([10, 20, 30, 40, 50, 60, 70, 80]) + h = np.array([0, 2, 0, 4, 0, 6, 0, 8]) + peaks, thr_idx = _peak_position(x, h) + assert peaks == [7] + assert thr_idx == 7 + + +def test_non_equal_length_inputs(): + x = np.array([10, 20, 30]) + h = np.array([1, 2]) + with pytest.raises(AttributeError): + _peak_position(x, h) + + +def test_threshold_effectiveness(): + x = np.array([10, 20, 30, 40, 50]) + h = np.array([100, 200, 50, 200, 100]) + thr = 25 # Set threshold to exclude the first two peaks + peaks, thr_idx = _peak_position(x, h, thr=thr) + assert peaks == [3] + assert thr_idx == 2 + + +def test_distance_constraint(): + x = np.array([0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200]) + h = np.array([1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1]) + dist = 20 # Minimum distance between peaks + peaks, thr_idx = _peak_position(x, h, dist=dist) + assert peaks == [9, 7, 5] + assert thr_idx == 4 + + +@pytest.fixture +def sample_chunk(): + return np.random.rand(100, 1, 256, 64).astype(np.float32) # 100 trains, 1 cell, 256x64 chunk + + +@pytest.fixture +def sample_chunk_16_cells(): + return np.random.rand(100, 16, 256, 64).astype(np.float32) # 100 trains, 16 cells, 256x64 chunk + + +def test_fill_histogram_basic(sample_chunk): + h_bins = np.linspace(0, 1000, 101, dtype=np.float32) # 100 bins + hist, edges = fill_histogram(sample_chunk, h_bins) + + assert hist.shape == (100, 1, 256, 64) + assert edges.shape == (101,) + assert np.all(hist >= 0) + assert np.sum(hist) == np.prod(sample_chunk.shape) + + +def test_fill_histogram_16_cells(sample_chunk_16_cells): + h_bins = np.linspace(0, 1000, 101, dtype=np.float32) + hist, _ = fill_histogram(sample_chunk_16_cells, h_bins) + + assert hist.shape == (100, 16, 256, 64) + assert np.sum(hist) == np.prod(sample_chunk_16_cells.shape) + + +def test_fill_histogram_single_train(): + chunk = np.random.rand(1, 1, 256, 64).astype(np.float32) + h_bins = np.linspace(0, 100, 11, dtype=np.float32) + hist, _ = fill_histogram(chunk, h_bins) + + assert hist.shape == (10, 1, 256, 64) + assert np.sum(hist) == np.prod(chunk.shape) + + +def test_fill_histogram_single_bin(): + chunk = np.ones((10, 1, 256, 64), dtype=np.float32) + h_bins = np.array([0, 2], dtype=np.float32) + hist, _ = fill_histogram(chunk, h_bins) + + assert hist.shape == (1, 1, 256, 64) + assert np.all(hist == 10) + + +def test_fill_histogram_float_data(): + chunk = np.random.rand(50, 1, 256, 64).astype(np.float32) + h_bins = np.linspace(0, 1, 11, dtype=np.float32) + hist, _ = fill_histogram(chunk, h_bins) + + assert hist.shape == (10, 1, 256, 64) + assert np.sum(hist) == np.prod(chunk.shape) + + +def test_fill_histogram_out_of_range(): + chunk = np.random.rand(100, 1, 256, 64).astype(np.float32) + h_bins = np.linspace(0, 100, 11, dtype=np.float32) + hist, _ = fill_histogram(chunk, h_bins) + + assert hist.shape == (10, 1, 256, 64) + assert np.sum(hist) <= np.prod(chunk.shape) + + +@pytest.mark.parametrize("shape", [ + (100, 1, 256, 64), + (50, 16, 256, 64), + (200, 1, 128, 32) +]) +def test_fill_histogram_various_shapes(shape): + chunk = np.random.rand(*shape).astype(np.float32) + h_bins = np.linspace(0, 1000, 101, dtype=np.float32) + hist, _ = fill_histogram(chunk, h_bins) + + assert hist.shape == (100, *shape[1:]) + assert np.sum(hist) == np.prod(shape) + + +@pytest.fixture +def sample_data(): + # 100 trains, 1 memory cell, 1024x512 image + return np.random.rand(100, 1, 1024, 512) + + +@pytest.fixture +def sample_data_16_cells(): + # 100 trains, 16 memory cells, 1024x512 image + return np.random.rand(100, 16, 1024, 512) + + +def test_chunk_multi_basic(sample_data): + block_size = [256, 64] + chunks = chunk_multi(sample_data, block_size) + assert len(chunks) == 32 # (1024 // 256) * (512 // 64) = 4 * 8 = 32 + assert chunks[0].shape == (100, 1, 256, 64) + + +def test_chunk_multi_16_cells(sample_data_16_cells): + block_size = [256, 64] + chunks = chunk_multi(sample_data_16_cells, block_size) + assert len(chunks) == 32 + assert chunks[0].shape == (100, 16, 256, 64) + + +def test_chunk_multi_uneven(): + data = np.random.rand(50, 1, 1000, 500) # Uneven dimensions + block_size = [256, 64] + chunks = chunk_multi(data, block_size) + assert chunks[-1].shape == (50, 1, 232, 52) # Last chunk should be smaller + + +def test_chunk_multi_single_train(): + data = np.random.rand(1, 1, 1024, 512) + block_size = [256, 64] + chunks = chunk_multi(data, block_size) + assert len(chunks) == 32 + assert chunks[0].shape == (1, 1, 256, 64) + + +def test_chunk_multi_small_block(): + data = np.random.rand(10, 1, 1024, 512) + block_size = [128, 32] + chunks = chunk_multi(data, block_size) + assert len(chunks) == 128 # (1024 // 128) * (512 // 32) = 8 * 16 = 128 + assert chunks[0].shape == (10, 1, 128, 32) + + +@pytest.mark.parametrize("invalid_input", [ + (np.random.rand(1024), [256]), # 1D array + (np.random.rand(100, 1, 1024, 512), [0, 64]), # Invalid block size + (np.random.rand(100, 1, 1024, 512), [256, 0]), # Invalid block size + (np.random.rand(100, 1, 1024, 512), [1025, 64]), # Block size larger than input + (np.random.rand(100, 1, 1024, 512), [256]), # Incomplete block size + ([1, 2, 3, 4], [256, 64]), # Not a numpy array +]) +def test_chunk_multi_invalid_input(invalid_input): + with pytest.raises(ValueError): + chunk_multi(*invalid_input) + + +def test_chunk_multi_empty_array(): + data = np.array([]) + block_size = [256, 64] + with pytest.raises(ValueError): + chunk_multi(data, block_size) + + +def test_chunk_multi_large_array(): + data = np.random.rand(1000, 1, 1024, 512) # 1000 trains + block_size = [256, 64] + chunks = chunk_multi(data, block_size) + assert len(chunks) == 32 + assert chunks[0].shape == (1000, 1, 256, 64) + + +def test_histogram_errors(): + # Test case 1: Array with positive, negative, and zero values + input_array = np.array([4, -9, 0, 16, -25, 1]) + expected_output = np.array([2, 3, 1, 4, 5, 1]) + assert_array_equal(histogram_errors(input_array), expected_output) + + # Test case 2: Array with all positive values + input_array = np.array([1, 4, 9, 16]) + expected_output = np.array([1, 2, 3, 4]) + assert_array_equal(histogram_errors(input_array), expected_output) + + # Test case 3: Array with all negative values + input_array = np.array([-1, -4, -9, -16]) + expected_output = np.array([1, 2, 3, 4]) + assert_array_equal(histogram_errors(input_array), expected_output) + + # Test case 4: Array with all zeros + input_array = np.zeros(5) + expected_output = np.ones(5) + assert_array_equal(histogram_errors(input_array), expected_output) + + # Test case 5: Empty array + input_array = np.array([]) + expected_output = np.array([]) + assert_array_equal(histogram_errors(input_array), expected_output) + + # Test case 6: Large values + input_array = np.array([1e6, -1e6]) + expected_output = np.array([1e3, 1e3]) + assert_array_almost_equal(histogram_errors(input_array), expected_output) + + + # Test case 7: Small values + input_array = np.array([1e-6, -1e-6]) + expected_output = np.array([1e-3, 1e-3]) + assert_array_almost_equal(histogram_errors(input_array), expected_output) + + +def test_rebin_histo(): + """ + Test function for rebin_histo. + """ + # Test case 1: Successful rebinning + hist = np.array([1, 2, 3, 4, 5, 6]) + bin_edges = np.array([0, 1, 2, 3, 4, 5, 6]) + rebin_factor = 2 + expected_hist = np.array([3, 7, 11]) + expected_bin_edges = np.array([0, 2, 4, 6]) + h_out, x_out = rebin_histo(hist, bin_edges, rebin_factor) + assert_array_equal(h_out, expected_hist) + assert_array_equal(x_out, expected_bin_edges) + + # Test case 2: Rebinning with factor 3 + hist = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + bin_edges = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + rebin_factor = 3 + expected_hist = np.array([6, 15, 24]) + expected_bin_edges = np.array([0, 3, 6, 9]) + h_out, x_out = rebin_histo(hist, bin_edges, rebin_factor) + assert_array_equal(h_out, expected_hist) + assert_array_equal(x_out, expected_bin_edges) + + # Test case 3: Error case - rebin_factor is not an exact divisor of the number of bin edges + hist = np.array([1, 2, 3, 4, 5]) + bin_edges = np.array([0, 1, 2, 3, 4, 5]) + rebin_factor = 2 + with pytest.raises(ValueError): + rebin_histo(hist, bin_edges, rebin_factor) + + +def test_set_histo_range_gaussian(): + # Generate a Gaussian-like histogram + x = np.linspace(-5, 5, 1000) + h = norm.pdf(x, loc=0, scale=1) + h_range = (-2, 2) + + # Apply set_histo_range + new_x, new_h = set_histo_range(x, h, h_range) + + # Check if the new range is within the specified range + assert new_x[0] >= h_range[0] and new_x[-1] <= h_range[1] + + # Check if the peak is preserved (should be near 0 for this Gaussian) + assert abs(x[np.argmax(h)] - new_x[np.argmax(new_h)]) < 0.1