fix: put back np.histogram function. plus some refactors and fixes

f740e137 · Karim Ahmed · 078673bc · f740e137 · f740e137 · 078673bc
Commit f740e137 authored 6 months ago by Karim Ahmed
--- a/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb
+++ b/notebooks/Jungfrau/Jungfrau_Create_Fit_Spectra_Histos_NBC.ipynb
@@ -93,11 +93,11 @@
    "import pasha as psh\n",
    "from extra_data import RunDirectory\n",
    "from h5py import File as h5file\n",
+    "from tqdm import tqdm\n",
    "\n",
    "import cal_tools.restful_config as rest_cfg\n",
    "from cal_tools.calcat_interface import JUNGFRAU_CalibrationData\n",
    "from cal_tools.jungfrau import jungfrau_ff\n",
-    "from cal_tools.jungfrau.jungfraualgs import fill_histogram\n",
    "from cal_tools.jungfrau.jungfraulib import JungfrauCtrl\n",
    "from cal_tools.step_timing import StepTimer\n",
    "from cal_tools.tools import calcat_creation_time"
@@ -389,13 +389,13 @@
    "        offset_map = const_data[da][\"Offset10Hz\"]\n",
    "        run_folder = in_folder / f'r{run:04d}'\n",
    "        ## Offset subtraction & Histogram filling\n",
-    "        ### performs offset subtraction andchunk_trains fills the histogram\n",
+    "        ### performs offset subtraction and chunk_trains fills the histogram\n",
    "        ### looping on individual files because single photon runs can be very large\n",
-    "        for dc_chunk in RunDirectory(\n",
-    "            run_folder, include=f\"*{da}*\"\n",
-    "        ).split_trains(trains_per_part=chunked_trains):\n",
+    "        chunks_list = list(RunDirectory(\n",
+    "            run_folder, include=f\"*{da}*\").split_trains(trains_per_part=chunked_trains))\n",
+    "        print(f\"Processing raw data and filling histogram in {len(chunks_list)} chunks\")\n",
    "\n",
-    "            trains = dc_chunk.get_array(det_src, 'data.trainId')\n",
+    "        for dc_chunk in chunks_list:\n",
    "            memcells = dc_chunk.get_array(\n",
    "                det_src,\n",
    "                'data.memoryCell',\n",
@@ -404,31 +404,28 @@
    "            adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)\n",
    "            gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)\n",
    "\n",
-    "            # gain = gain.where(gain < 2, other = 2).astype(np.int16)\n",
    "            step_timer.start()\n",
-    "\n",
+    "            # gain = gain.where(gain < 2, other = 2).astype(np.int16)\n",
    "            # Allocate shared arrays for corrected data. Used in `correct_train()`\n",
    "            data_corr = context.alloc(shape=adc.shape, dtype=np.float32)\n",
    "            context.map(correct_train, adc)\n",
    "            step_timer.done_step(\"correct_train\")\n",
+    "\n",
    "            step_timer.start()\n",
    "            chunks = jungfrau_ff.chunk_multi(data_corr, block_size)\n",
+    "            ch_inp = [(c, h_bins) for c in chunks]\n",
+    "            with multiprocessing.Pool(processes=min(n_cpus, len(chunks))) as pool:\n",
+    "                results = pool.starmap(jungfrau_ff.fill_histogram, ch_inp)\n",
    "\n",
-    "            with multiprocessing.Pool() as pool:\n",
+    "            for i, (h_chk, e_chk) in enumerate(results):\n",
+    "                if edges[da] is None:\n",
+    "                    edges[da] = e_chk\n",
    "\n",
-    "                partial_fill = partial(fill_histogram, h_bins)\n",
-    "                r_maps = pool.map(partial_fill, chunks)\n",
+    "                n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])\n",
+    "                irow = int(np.floor(i/n_blocks_col)) * block_size[0]\n",
+    "                icol = i%n_blocks_col * block_size[1]\n",
+    "                h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk\n",
    "\n",
-    "                for i, r in enumerate(r_maps):\n",
-    "                    h_chk, e_chk = r\n",
-    "                    if edges[da] is None:\n",
-    "                        edges[da] = np.array(e_chk)\n",
-    "\n",
-    "                    n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])\n",
-    "                    irow = int(np.floor(i/n_blocks_col)) * block_size[0]\n",
-    "                    icol = i%n_blocks_col * block_size[1]\n",
-    "\n",
-    "                    h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk\n",
    "            step_timer.done_step(\"Histogram created\")"
   ]
  },
@@ -458,7 +455,7 @@
    "for da in karabo_da:\n",
    "    # transpose h_spectra for the following cells.\n",
    "    h_spectra[da] = h_spectra[da]\n",
-    "    fout_h_path = f'{out_folder}/R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5'\n",
+    "    fout_h_path = out_folder/ f\"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5\"\n",
    "    hists = h_spectra[da]\n",
    "    with h5file(fout_h_path, 'w') as fout_h:\n",
    "        print(f\"Saving histograms at {fout_h_path}.\")\n",
@@ -475,6 +472,39 @@
    "        fout_h.attrs[\"creation_time\"] = str(creation_time)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_and_fill_map(r_maps, shape, dtype=np.float32):\n",
+    "\n",
+    "    g0_map = np.zeros(shape, dtype=dtype)\n",
+    "    sigma_map = np.zeros(shape, dtype=dtype)\n",
+    "    chi2ndf_map = np.zeros(shape, dtype=dtype)\n",
+    "    alpha_map = np.zeros(shape, dtype=dtype)\n",
+    "    n_blocks_col = int(shape[-1] / block_size[1])\n",
+    "\n",
+    "    for i, (g0_chk, sigma_chk, chi2ndf_chk, alpha_chk) in tqdm(enumerate(r_maps)):\n",
+    "\n",
+    "        irow = int(np.floor(i / n_blocks_col)) * block_size[0]\n",
+    "        icol = i % n_blocks_col * block_size[1]\n",
+    "\n",
+    "        slice_obj = (\n",
+    "            ...,\n",
+    "            slice(irow, irow + block_size[0]),\n",
+    "            slice(icol, icol + block_size[1])\n",
+    "        )\n",
+    "\n",
+    "        g0_map[slice_obj] = g0_chk\n",
+    "        sigma_map[slice_obj] = sigma_chk\n",
+    "        chi2ndf_map[slice_obj] = chi2ndf_chk\n",
+    "        alpha_map[slice_obj] = alpha_chk\n",
+    "\n",
+    "    return g0_map, sigma_map, chi2ndf_map, alpha_map"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -511,41 +541,26 @@
    "        const_data[da][\"Noise10Hz\"],\n",
    "    )\n",
    "    print(\"starting spectra fit\")\n",
+    "\n",
    "    step_timer.start()\n",
    "    with multiprocessing.Pool() as pool:\n",
    "        r_maps = pool.map(partial_fit, chunks)\n",
    "    step_timer.done_step(\"r_maps calculation\")\n",
    "\n",
    "    step_timer.start()\n",
-    "    g0_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n",
-    "    sigma_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n",
-    "    chi2ndf_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n",
-    "    alpha_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)\n",
-    "    for i, r in enumerate(r_maps):\n",
-    "        g0_chk, sigma_chk, chi2ndf_chk, alpha_chk = r\n",
-    "\n",
-    "        n_blocks_col = int(g0_map.shape[-1] / block_size[1])\n",
-    "        irow = int(np.floor(i / n_blocks_col)) * block_size[0]\n",
-    "        icol = i % n_blocks_col * block_size[1]\n",
-    "\n",
-    "        g0_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = g0_chk\n",
-    "        sigma_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = sigma_chk\n",
-    "        chi2ndf_map[\n",
-    "            ..., irow : irow + block_size[0], icol : icol + block_size[1]\n",
-    "        ] = chi2ndf_chk\n",
-    "        alpha_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = alpha_chk\n",
+    "    map_shape = (memory_cells, sensor_size[0], sensor_size[1])\n",
+    "    g0_map, sigma_map, chi2ndf_map, alpha_map = create_and_fill_map(r_maps, map_shape)\n",
    "    step_timer.done_step(\"Finished fitting\")\n",
-    "    fout_path = f\"{out_folder}/{fout_temp.format(da)}\"\n",
    "\n",
    "    step_timer.start()\n",
+    "    fout_path = out_folder / fout_temp.format(da)\n",
    "    with h5file(fout_path, \"w\") as fout:\n",
    "        dset_chi2 = fout.create_dataset(\"chi2map\", data=np.transpose(chi2ndf_map))\n",
    "        dset_gmap_fit = fout.create_dataset(\"gainMap_fit\", data=np.transpose(g0_map))\n",
    "        dset_std = fout.create_dataset(\"sigmamap\", data=np.transpose(sigma_map))\n",
    "        dset_alpha = fout.create_dataset(\"alphamap\", data=np.transpose(alpha_map))\n",
-    "        dset_noi = fout.create_dataset(\n",
-    "            \"noise_map\",\n",
-    "            data=const_data[da][\"Noise10Hz\"])\n",
+    "        dset_noi = fout.create_dataset(\"noise_map\", data=const_data[da][\"Noise10Hz\"])\n",
+    "\n",
    "        fout.attrs[\"memory_cells\"] = memory_cells\n",
    "        fout.attrs[\"integration_time\"] = integration_time\n",
    "        fout.attrs[\"bias_voltage\"] = bias_voltage\n",

 %% Cell type:markdown id: tags:

 # Jungfrau Detector Flatfield Gain Calibration - Part 1: Histogram Creation and Fitting

 Author: European XFEL Detector Group, Version: 1.0

 This notebook performs the first part of the Jungfrau detector flatfield gain calibration process.

 #### Overview

 1. Load Raw Data: Read detector data for specified run. Currently works for only one run.
 2. Initialize Histograms: Create empty histograms for each module and memory cell
 3. Offset Correction: Apply offset subtraction to raw data
 4. Histogram Population: Fill histograms with offset-corrected data
 5. Histogram Fitting: Apply specified function (Charge Sharing, Double Charge Sharing, or Gaussian)
 6. Save Intermediate Results:
   - Store histogram data as HDF5 files
   - Save fit results (gain map, sigma map, chi-square map, alpha map) as HDF5 files

 Note: Results from this notebook are used later for Gain Map and BadPixelsFF Generation.

 %% Cell type:code id: tags:

 ``` python
 in_folder = '/gpfs/exfel/exp/SPB/202330/p900343/raw'  # RAW data path, required
 out_folder = '/gpfs/exfel/data/scratch/ahmedk/test/remove/JF4M_SPB_gain/r66/no_offset'  # Output path for gain data, required
 metadata_folder = '.'  # Directory containing calibration_metadata.yml when run by xfel-calibrate
 runs = [66]  # can be a list of runs
 sensor_size = [512, 1024]  # size of the array in the 'row' and 'col' dimensions
 chunked_trains = 1000  # Number of trains per chunk.
 gains = [0, 1, 2]  # gain bit values
 karabo_id = "SPB_IRDA_JF4M"  # karabo prefix of Jungfrau devices
 karabo_da = [""]  # Detector's data aggregators. Leave empty to derive from CALCAT.
 creation_time = ''  # To overwrite the measured creation_time. Required Format: YYYY-MM-DD HR:MN:SC e.g. "2022-06-28 13:00:00"

 # Parameter conditions
 bias_voltage = -1  # bias voltage - Set to -1 to derive from CONTROL file.
 integration_time = -1.  # integration time - Set to -1 to derive from CONTROL file.
 gain_mode = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 gain_setting = -1  # gain setting parameter conditions (High or low CDS) - Set to -1 to derive from CONTROL file.
 memory_cells = -1  # number of memory cells - Set to -1 to derive from CONTROL file.
 sc_start = -1  # storage cell start value - should be derived from CONTROL file.

 h_bins_s = 2  # bin width in ADC units of the histogram
 h_range = [-50, 450.] # histogram range in ADC units
 block_size = [256, 64] # dimension of the chunks in 'row' and 'col'
 control_src_template = '{}/DET/CONTROL'
 extra_dims = ['cell', 'row', 'col']  # labels for the DataArray dims after the first
 fit_func = 'CHARGE_SHARING'  # function used to fit the single photon peak. The 3 available options are: CHARGE_SHARING, CHARGE_SHARING_2, and GAUSS
 fit_h_range = [150, 350.]  # range of the histogram in x-axis units used in fitting

 # parameters for the peak finder
 n_sigma = 5.  # n of sigma above pedestal threshold
 ratio = 0.99  # ratio of the next peak amplitude in the peak_finder
 rebin = 1


 def find_das(in_folder, runs, karabo_da):
    run = runs[0]
    if (karabo_da is not None) and karabo_da != [""]:
        return karabo_da
    from pathlib import Path
    import re
    karabo_da = set()
    for file in Path(in_folder, f'r{run:04d}').iterdir():
        da = re.search(r'JNGFR\d{2}', file.name)
        if da:
            karabo_da.add(da.group())
    return sorted(karabo_da)
 ```

 %% Cell type:code id: tags:

 ``` python
 import multiprocessing
 import time
 from functools import partial
 from logging import warning
 from pathlib import Path

 import numpy as np
 import pasha as psh
 from extra_data import RunDirectory
 from h5py import File as h5file
+from tqdm import tqdm

 import cal_tools.restful_config as rest_cfg
 from cal_tools.calcat_interface import JUNGFRAU_CalibrationData
 from cal_tools.jungfrau import jungfrau_ff
-from cal_tools.jungfrau.jungfraualgs import fill_histogram
 from cal_tools.jungfrau.jungfraulib import JungfrauCtrl
 from cal_tools.step_timing import StepTimer
 from cal_tools.tools import calcat_creation_time
 ```

 %% Cell type:code id: tags:

 ``` python
 proposal = list(filter(None, in_folder.strip('/').split('/')))[-2]
 in_folder = Path(in_folder)
 out_folder = Path(out_folder)
 out_folder.mkdir(exist_ok=True, parents=True)
 print(f'Creating directory {out_folder}')
 ```

 %% Cell type:code id: tags:

 ``` python
 begin_stuff = time.localtime()
 control_src = control_src_template.format(karabo_id, karabo_da[0])
 first_run_folder = in_folder / f'r{runs[0]:04d}'
 ctrl_data = JungfrauCtrl(RunDirectory(first_run_folder), control_src)

 # Run's creation time:
 creation_time = calcat_creation_time(in_folder, runs[0], creation_time)
 print(f"Creation time: {creation_time}")
 print("Chunked trains: ", chunked_trains)
 ```

 %% Cell type:code id: tags:

 ``` python
 def read_detector_conditions(
    run_dc,
    run_number,
    bias_voltage,
    memory_cells,
    integration_time,
    gain_mode,
    gain_setting,
 ):
    """Get parameter conditions for a run.

    Args:
        run_dc(extra_data.DataCollection):
        run_number(int):
        integration_time(float)
        bias_voltage(float):
        gain_mode(int):
        gain_setting(int):

    Returns:
        float, int, float, int, int, int
    """

    ## Retrieve DB constants
    ctrl_data = JungfrauCtrl(run_dc, control_src)

    if memory_cells < 0:
        memory_cells, sc_start = ctrl_data.get_memory_cells()
    if integration_time < 0:
        integration_time = ctrl_data.get_integration_time()
    if bias_voltage < 0:
        bias_voltage = ctrl_data.get_bias_voltage()
    if gain_setting < 0:
        gain_setting = ctrl_data.get_gain_setting()
    if gain_mode < 0:
        gain_mode = ctrl_data.get_gain_mode()
        if memory_cells == 16 and gain_mode == 0:
            warning(
                "Data is in burst mode and dynamic gain. Forcing gain mode to fixed gain.")
            gain_mode = 1

    return (
        bias_voltage,
        memory_cells,
        sc_start,
        integration_time,
        gain_mode,
        gain_setting,
        run_number,
    )

 condition_lists = {
    "integration_time": [],
    "memory_cells": [],
    "sc_start": [],
    "bias_voltage": [],
    "gain_setting": [],
    "gain_mode": [],
    "runs": [],
 }

 for run in runs:
    voltage, cells, sc_start, integration, mode, setting, run_number = read_detector_conditions(  # noqa
        RunDirectory(in_folder / f"r{run:04d}"),
        run,
        bias_voltage=bias_voltage,
        memory_cells=memory_cells,
        integration_time=integration_time,
        gain_mode=gain_mode,
        gain_setting=gain_setting)

    condition_lists["bias_voltage"].append(voltage)
    condition_lists["memory_cells"].append(cells)
    condition_lists["integration_time"].append(integration)
    condition_lists["gain_mode"].append(mode)
    condition_lists["gain_setting"].append(setting)
    condition_lists["sc_start"].append(sc_start)

 # Validate run conditions
 runs_results = condition_lists["runs"]
 for c, lst in condition_lists.items():
    if c == "runs":
        continue
    if not all(val == lst[0] for val in lst):
        warning(
            f"{c} is not the same for all runs: {lst} "
            f"for runs {runs_results}, respectively"
        )

 memory_cells = condition_lists["memory_cells"][0]
 sc_start = condition_lists["sc_start"][0]
 print(f"Memory cells: {memory_cells:d}")
 bias_voltage = condition_lists["bias_voltage"][0]
 print(f"Bias Voltage: {bias_voltage:3.1f} V")
 integration_time = condition_lists["integration_time"][0]
 print(f"Exposure Time: {integration_time:1.2f} us")
 gain_mode = condition_lists["gain_mode"][0]
 print(f"Gain mode: {gain_mode}")
 gain_setting = condition_lists["gain_setting"][0]
 print(f"Gain setting: {gain_setting}")
 ```

 %% Cell type:code id: tags:

 ``` python
 # Retrieve calibration constants
 jf_cal = JUNGFRAU_CalibrationData(
    detector_name=karabo_id,
    sensor_bias_voltage=bias_voltage,
    event_at=creation_time,
    modules=karabo_da,
    memory_cells=memory_cells,
    integration_time=integration_time,
    gain_setting=gain_setting,
    # Retrieve fixed gain constants for burst mode.
    # Dark MDL pushes fixed gain constants for burst mode.
    gain_mode=gain_mode,
    client=rest_cfg.calibration_client(),
 )

 jf_metadata = jf_cal.metadata(
    calibrations=["Offset10Hz", "Noise10Hz"])

 # TODO: display CCV timestamp
 const_data = jf_cal.ndarray_map(metadata=jf_metadata)
 jf_cal.display_markdown_retrieved_constants(metadata=jf_metadata)

 for mod in karabo_da:
    constants_data = const_data[mod]

    if "Offset10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Offset10Hz"] = np.transpose(
            np.swapaxes(constants_data["Offset10Hz"], 0, 1)).astype(np.float32)
    else:
        raise ValueError(f"Offset constant is not found for {mod}.")

    if "Noise10Hz" in constants_data:
        # From (512, 1024, 1, 3) to (3, 1, 512, 1024)
        constants_data["Noise10Hz"] = np.transpose(
            np.swapaxes(constants_data["Noise10Hz"], 0, 1)).astype(np.float32)
    else:
        constants_data["Noise10Hz"] = None
        warning(f"Noise constant is not found for {mod}")
 ```

 %% Cell type:markdown id: tags:

 ## Filling the histogram
 * first it will retreive DB offsets for a given run
 * on a file-by-file basis it will perform offset subtraction and fill the histogram

 %% Cell type:markdown id: tags:

 ### Creating empty histogram

 %% Cell type:code id: tags:

 ``` python
 h_n_bins = int((h_range[1] - h_range[0])/h_bins_s)
 h_bins = np.linspace(h_range[0], h_range[1], h_n_bins, dtype=np.float32)

 h_spectra = dict()
 edges = dict()
 for da in karabo_da:
    h_spectra[da] = np.zeros(
        (h_n_bins-1, memory_cells, sensor_size[0], sensor_size[1]),
        dtype=np.int32,
    )
    edges[da] = None
 ```

 %% Cell type:code id: tags:

 ``` python
 def correct_train(wid, index, d):
    """Offset correct per train."""

    g = gain[index].values
    # Jungfrau gains 0[00], 1[01], 3[11]
    # Change low gain to 2 for indexing purposes.
    g[g==3] = 2

    # Select memory cells
    if memory_cells > 1:
        """
        Even though it is correct to assume that memory cells pattern
        can be the same across all trains (for one correction run
        taken with one acquisition), it is preferred to not assume
        this to account for exceptions that can happen.
        """
        m = memcells[index].copy()

        # 255 is a cell value pointing to no cell image data (image of 0 pixels).
        # Corresponding image will be corrected with constant of cell 0. To avoid values of 0.
        # This line is depending on not storing the modified memory cells in the corrected data.
        m[m==255] = 0
        offset_map_cell = offset_map[:, m, ...]  # [16 + empty cell, x, y]
    else:
        offset_map_cell = offset_map[:, 0, ...]

    # Offset correction
    offset = np.choose(g, offset_map_cell)

    d -= offset
    # Sort correct data based on memory cell order.
    sort_indices = np.argsort(m)
    data_corr[index, ...] = d[sort_indices, ...]

 ```

 %% Cell type:code id: tags:

 ``` python
 step_timer = StepTimer()
 n_cpus = multiprocessing.cpu_count()
 context = psh.context.ProcessContext(num_workers=n_cpus)
 print(f"Using {n_cpus} workers for correction.")
 ```

 %% Cell type:code id: tags:

 ``` python
 for run in runs:
    for da in karabo_da:
        det_src = f'{karabo_id}/DET/{da}:daqOutput'
        offset_map = const_data[da]["Offset10Hz"]
        run_folder = in_folder / f'r{run:04d}'
        ## Offset subtraction & Histogram filling
-        ### performs offset subtraction andchunk_trains fills the histogram
+        ### performs offset subtraction and chunk_trains fills the histogram
        ### looping on individual files because single photon runs can be very large
-        for dc_chunk in RunDirectory(
-            run_folder, include=f"*{da}*"
-        ).split_trains(trains_per_part=chunked_trains):
+        chunks_list = list(RunDirectory(
+            run_folder, include=f"*{da}*").split_trains(trains_per_part=chunked_trains))
+        print(f"Processing raw data and filling histogram in {len(chunks_list)} chunks")

-            trains = dc_chunk.get_array(det_src, 'data.trainId')
+        for dc_chunk in chunks_list:
            memcells = dc_chunk.get_array(
                det_src,
                'data.memoryCell',
                extra_dims=extra_dims[0:1]).astype(np.int16)

            adc = dc_chunk.get_array(det_src, 'data.adc', extra_dims=extra_dims).astype(np.float32)
            gain = dc_chunk.get_array(det_src, 'data.gain', extra_dims=extra_dims)

-            # gain = gain.where(gain < 2, other = 2).astype(np.int16)
            step_timer.start()
-
+            # gain = gain.where(gain < 2, other = 2).astype(np.int16)
            # Allocate shared arrays for corrected data. Used in `correct_train()`
            data_corr = context.alloc(shape=adc.shape, dtype=np.float32)
            context.map(correct_train, adc)
            step_timer.done_step("correct_train")
+
            step_timer.start()
            chunks = jungfrau_ff.chunk_multi(data_corr, block_size)
+            ch_inp = [(c, h_bins) for c in chunks]
+            with multiprocessing.Pool(processes=min(n_cpus, len(chunks))) as pool:
+                results = pool.starmap(jungfrau_ff.fill_histogram, ch_inp)
+
+            for i, (h_chk, e_chk) in enumerate(results):
+                if edges[da] is None:
+                    edges[da] = e_chk
+
+                n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])
+                irow = int(np.floor(i/n_blocks_col)) * block_size[0]
+                icol = i%n_blocks_col * block_size[1]
+                h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk

-            with multiprocessing.Pool() as pool:
-
-                partial_fill = partial(fill_histogram, h_bins)
-                r_maps = pool.map(partial_fill, chunks)
-
-                for i, r in enumerate(r_maps):
-                    h_chk, e_chk = r
-                    if edges[da] is None:
-                        edges[da] = np.array(e_chk)
-
-                    n_blocks_col = int(h_spectra[da].shape[-1]/block_size[1])
-                    irow = int(np.floor(i/n_blocks_col)) * block_size[0]
-                    icol = i%n_blocks_col * block_size[1]
-
-                    h_spectra[da][..., irow:irow+block_size[0], icol:icol+block_size[1]] += h_chk
            step_timer.done_step("Histogram created")
 ```

 %% Cell type:code id: tags:

 ``` python
 print(f"Total processing time {step_timer.timespan():.01f} s")
 step_timer.print_summary()
 ```

 %% Cell type:markdown id: tags:

 ### Saving histogram

 %% Cell type:code id: tags:

 ``` python
 for da in karabo_da:
    # transpose h_spectra for the following cells.
    h_spectra[da] = h_spectra[da]
-    fout_h_path = f'{out_folder}/R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5'
+    fout_h_path = out_folder/ f"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{da}_Histo.h5"
    hists = h_spectra[da]
    with h5file(fout_h_path, 'w') as fout_h:
        print(f"Saving histograms at {fout_h_path}.")
        dset_h = fout_h.create_dataset("histos", data=hists)
        dset_e = fout_h.create_dataset("edges", data=edges[da])
        dset_noi = fout_h.create_dataset(
            "noise_map",
            data=const_data[da]["Noise10Hz"])
        fout_h.attrs["memory_cells"] = memory_cells
        fout_h.attrs["gain_setting"] = gain_setting
        fout_h.attrs["gain_mode"] = gain_mode
        fout_h.attrs["integration_time"] = integration_time
        fout_h.attrs["bias_voltage"] = bias_voltage
        fout_h.attrs["creation_time"] = str(creation_time)
 ```

+%% Cell type:code id: tags:
+
+``` python
+def create_and_fill_map(r_maps, shape, dtype=np.float32):
+
+    g0_map = np.zeros(shape, dtype=dtype)
+    sigma_map = np.zeros(shape, dtype=dtype)
+    chi2ndf_map = np.zeros(shape, dtype=dtype)
+    alpha_map = np.zeros(shape, dtype=dtype)
+    n_blocks_col = int(shape[-1] / block_size[1])
+
+    for i, (g0_chk, sigma_chk, chi2ndf_chk, alpha_chk) in tqdm(enumerate(r_maps)):
+
+        irow = int(np.floor(i / n_blocks_col)) * block_size[0]
+        icol = i % n_blocks_col * block_size[1]
+
+        slice_obj = (
+            ...,
+            slice(irow, irow + block_size[0]),
+            slice(icol, icol + block_size[1])
+        )
+
+        g0_map[slice_obj] = g0_chk
+        sigma_map[slice_obj] = sigma_chk
+        chi2ndf_map[slice_obj] = chi2ndf_chk
+        alpha_map[slice_obj] = alpha_chk
+
+    return g0_map, sigma_map, chi2ndf_map, alpha_map
+```
+
 %% Cell type:markdown id: tags:

 ## Fitting histograms

 %% Cell type:code id: tags:

 ``` python
 fout_temp = f"R{runs[0]:04d}_{proposal.upper()}_Gain_Spectra_{{}}_{fit_func}_Fit.h5"

 for da in karabo_da:
    _edges = np.array(edges[da])
    x = (_edges[1:] + _edges[:-1]) / 2.0

    x, _h_spectra = jungfrau_ff.set_histo_range(x, h_spectra[da], fit_h_range)
    print(f'adjusting histogram range: {x[0]} - {x[-1]}')
    print(f'Histo shape updated from {h_spectra[da].shape} to {_h_spectra.shape}')
    print(f'x-axis: {x.shape}')
    chunks = jungfrau_ff.chunk_multi(_h_spectra, block_size)
    pool = multiprocessing.Pool()

    partial_fit = partial(
        jungfrau_ff.fit_histogram,
        x,
        fit_func,
        n_sigma,
        rebin,
        ratio,
        const_data[da]["Noise10Hz"],
    )
    print("starting spectra fit")
+
    step_timer.start()
    with multiprocessing.Pool() as pool:
        r_maps = pool.map(partial_fit, chunks)
    step_timer.done_step("r_maps calculation")

    step_timer.start()
-    g0_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
-    sigma_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
-    chi2ndf_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
-    alpha_map = np.zeros((memory_cells, sensor_size[0], sensor_size[1]), dtype=np.float32)
-    for i, r in enumerate(r_maps):
-        g0_chk, sigma_chk, chi2ndf_chk, alpha_chk = r
-
-        n_blocks_col = int(g0_map.shape[-1] / block_size[1])
-        irow = int(np.floor(i / n_blocks_col)) * block_size[0]
-        icol = i % n_blocks_col * block_size[1]
-
-        g0_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = g0_chk
-        sigma_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = sigma_chk
-        chi2ndf_map[
-            ..., irow : irow + block_size[0], icol : icol + block_size[1]
-        ] = chi2ndf_chk
-        alpha_map[..., irow : irow + block_size[0], icol : icol + block_size[1]] = alpha_chk
+    map_shape = (memory_cells, sensor_size[0], sensor_size[1])
+    g0_map, sigma_map, chi2ndf_map, alpha_map = create_and_fill_map(r_maps, map_shape)
    step_timer.done_step("Finished fitting")
-    fout_path = f"{out_folder}/{fout_temp.format(da)}"

    step_timer.start()
+    fout_path = out_folder / fout_temp.format(da)
    with h5file(fout_path, "w") as fout:
        dset_chi2 = fout.create_dataset("chi2map", data=np.transpose(chi2ndf_map))
        dset_gmap_fit = fout.create_dataset("gainMap_fit", data=np.transpose(g0_map))
        dset_std = fout.create_dataset("sigmamap", data=np.transpose(sigma_map))
        dset_alpha = fout.create_dataset("alphamap", data=np.transpose(alpha_map))
-        dset_noi = fout.create_dataset(
-            "noise_map",
-            data=const_data[da]["Noise10Hz"])
+        dset_noi = fout.create_dataset("noise_map", data=const_data[da]["Noise10Hz"])
+
        fout.attrs["memory_cells"] = memory_cells
        fout.attrs["integration_time"] = integration_time
        fout.attrs["bias_voltage"] = bias_voltage
        fout.attrs["gain_setting"] = gain_setting
        fout.attrs["gain_mode"] = gain_mode
        fout.attrs["creation_time"] = str(creation_time)
        fout.attrs["karabo_id"] = karabo_id
    step_timer.done_step("Saved fit results")
 ```

--- a/src/cal_tools/jungfrau/jungfrau_ff.py
+++ b/src/cal_tools/jungfrau/jungfrau_ff.py
@@ -56,38 +56,41 @@ def chunk_multi(data, block_size):
    return chunks


-def subtract_offset(offset_map, _inp):
+def fill_histogram(imgs, h_bins):
    """
-    Perform offset subtraction on raw data.
-    Args:
-        offset_map (xarray, float): map with offset constants,
-            with shape (3, memory_cells, row, col).
-        _inp (list): input data as:
-            * _inp[0]: raw images, with shape (trains, memory_cells, row, col).
-            * _inp[1]: gain bit map, with shape (trains, memory_cells, row, col).
-    Return: offset subtracted images.
-    """
-    imgs = _inp[0]
-    gbit = _inp[1]
+    Fills an histogram with shape
+    (n_bins-1, memory_cells, n_rows, n_cols)
+    from input images.

-    n_cells = imgs.shape[1]
+    Args:
+        h_bins (list, float): the binning of the x-axis
+        imgs (np.ndarray): image data to histogram
+            (trains, memory_cells, row, col).

-    for cell in range(n_cells):
-        this_cell_gbit = gbit.sel(cell=cell)
+    Returns: histogram bin counts, bin edges.
+    """
+    if not isinstance(imgs, np.ndarray):
+        raise TypeError("Expected imgs numpy ndarray type.")

-        this_cell_off = offset_map[:, cell]
+    if imgs.ndim < 4:
+        raise ValueError("Expected 4D imgs array.")

-        _o = np.choose(
-            this_cell_gbit, (
-                np.expand_dims(this_cell_off[0], axis=0),
-                np.expand_dims(this_cell_off[1], axis=0),
-                np.expand_dims(this_cell_off[2], axis=0)
-            )
-        )
+    n_cells, n_rows, n_cols = imgs.shape[1:]

-        imgs.loc[dict(cell=cell)] -= _o
+    h_chk = np.zeros(
+        (len(h_bins)-1, n_cells, n_rows, n_cols),
+        dtype=np.int32)

-    return imgs
+    e_chk = None
+    for cell in range(n_cells):
+        for row in range(n_rows):
+            for col in range(n_cols):
+                this_pix = imgs[:, cell, row, col]
+                _h, _e = np.histogram(this_pix, bins=h_bins)
+                h_chk[..., cell, row, col] += _h
+                if e_chk is None:
+                    e_chk = np.array(_e)
+    return h_chk, e_chk


 # peak finder to find the first photon peak and/or the pedestal peak
@@ -391,23 +394,25 @@ def rebin_histo(h, x, r):
    return h_out, x_out


-def fit_histogram(x, _fit_func, n_sigma, rebin, ratio, noise, _inp):
+def fit_histogram(x, _fit_func, n_sigma, rebin, ratio, noise, histo):
    """
    wrap around function for fitting of histogram
    
-    arguments:
-    * x (list, float): - bin centers along x
-    * _fit_func (string): which function to use for fit
-         * CHARGE_SHARING: single peak with charge sharing tail
-         * CHARGE_SHARING_2: sum of two CHARGE_SHARING
-         * GAUSS: gaussian function
-    * n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0
-    * ratio (float): ratio parameter of the peak finder
-    * _input (list): contains the data to preform the fit
-         * _input[0]: histogram bin counts with shape (n_bins, memory_cells, row, col)
-         * _input[1]: noise map with shape (3, memory_cells, row, col)
-         
-    returns: map of peak values, map of peak variances, map of chi2/ndf, map of charge sharing parameter values
+    Args:
+        x (list, float): - bin centers along x
+        _fit_func (string): which function to use for fit
+            - CHARGE_SHARING: single peak with charge sharing tail
+            - CHARGE_SHARING_2: sum of two CHARGE_SHARING
+            - GAUSS: gaussian function
+        n_sigma (int): to calculate threshold of the peak finder as thr = n_sigma * sigma0
+        ratio (float): ratio parameter of the peak finder
+        histo (ndarray): histogram bin counts with shape (n_bins, memory_cells, row, col)
+
+    Returns:
+        - map of peak values
+        - map of peak variances
+        - map of chi2/ndf
+        - map of charge sharing parameter values
    """
    _funcs = dict(
        CHARGE_SHARING=fit_charge_sharing, 
@@ -416,7 +421,6 @@ def fit_histogram(x, _fit_func, n_sigma, rebin, ratio, noise, _inp):
    )
    fit_func = _funcs[_fit_func]

-    histo = _inp[0]
    n_cells, n_rows, n_cols = histo.shape[1:]

    sigma0 = 15.

--- a/src/cal_tools/jungfrau/jungfraualgs.pyx
+++ b/src/cal_tools/jungfrau/jungfraualgs.pyx
-import numpy as np
-cimport numpy as np
-cimport cython
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def fill_histogram(np.ndarray[np.float32_t, ndim=1] h_bins, np.ndarray[np.float32_t, ndim=4] imgs):
-    cdef int n_trains = imgs.shape[0]
-    cdef int n_cells = imgs.shape[1]
-    cdef int n_rows = imgs.shape[2]
-    cdef int n_cols = imgs.shape[3]
-    cdef int n_bins = len(h_bins) - 1
-
-    cdef np.ndarray[np.int32_t, ndim=4] h_out = np.zeros((n_bins, n_cells, n_rows, n_cols), dtype=np.int32)
-    cdef int train, cell, row, col, bin
-    cdef double value
-
-    for cell in range(n_cells):
-        for row in range(n_rows):
-            for col in range(n_cols):
-                for train in range(n_trains):
-                    value = imgs[train, cell, row, col]
-                    for bin in range(n_bins):
-                        if h_bins[bin] <= value < h_bins[bin + 1]:
-                            h_out[bin, cell, row, col] += 1
-                            break
-    return h_out, h_bins
\ No newline at end of file
--- a/tests/test_jungfrau_ff.py
+++ b/tests/test_jungfrau_ff.py
@@ -5,9 +5,10 @@ import pytest

 from cal_tools.jungfrau.jungfrau_ff import (
    chunk_multi,
+    fill_histogram,
    _peak_position,
 )
-from cal_tools.jungfrau.jungfraualgs import fill_histogram
+

 def test_peak_detection_correctness():
    x = np.array([10, 20, 30, 40, 50, 60, 70, 80])
@@ -54,7 +55,7 @@ def sample_chunk_16_cells():

 def test_fill_histogram_basic(sample_chunk):
    h_bins = np.linspace(0, 1000, 101, dtype=np.float32)  # 100 bins
-    hist, edges = fill_histogram(h_bins, sample_chunk)
+    hist, edges = fill_histogram(sample_chunk, h_bins)
    
    assert hist.shape == (100, 1, 256, 64)
    assert edges.shape == (101,)
@@ -64,7 +65,7 @@ def test_fill_histogram_basic(sample_chunk):

 def test_fill_histogram_16_cells(sample_chunk_16_cells):
    h_bins = np.linspace(0, 1000, 101, dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, sample_chunk_16_cells)
+    hist, _ = fill_histogram(sample_chunk_16_cells, h_bins)
    
    assert hist.shape == (100, 16, 256, 64)
    assert np.sum(hist) == np.prod(sample_chunk_16_cells.shape)
@@ -73,7 +74,7 @@ def test_fill_histogram_16_cells(sample_chunk_16_cells):
 def test_fill_histogram_single_train():
    chunk = np.random.rand(1, 1, 256, 64).astype(np.float32)
    h_bins = np.linspace(0, 100, 11, dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, chunk)
+    hist, _ = fill_histogram(chunk, h_bins)
    
    assert hist.shape == (10, 1, 256, 64)
    assert np.sum(hist) == np.prod(chunk.shape)
@@ -82,7 +83,7 @@ def test_fill_histogram_single_train():
 def test_fill_histogram_single_bin():
    chunk = np.ones((10, 1, 256, 64), dtype=np.float32)
    h_bins = np.array([0, 2], dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, chunk)
+    hist, _ = fill_histogram(chunk, h_bins)
    
    assert hist.shape == (1, 1, 256, 64)
    assert np.all(hist == 10)
@@ -91,7 +92,7 @@ def test_fill_histogram_single_bin():
 def test_fill_histogram_float_data():
    chunk = np.random.rand(50, 1, 256, 64).astype(np.float32)
    h_bins = np.linspace(0, 1, 11, dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, chunk)
+    hist, _ = fill_histogram(chunk, h_bins)
    
    assert hist.shape == (10, 1, 256, 64)
    assert np.sum(hist) == np.prod(chunk.shape)
@@ -100,7 +101,7 @@ def test_fill_histogram_float_data():
 def test_fill_histogram_out_of_range():
    chunk = np.random.rand(100, 1, 256, 64).astype(np.float32)
    h_bins = np.linspace(0, 100, 11, dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, chunk)
+    hist, _ = fill_histogram(chunk, h_bins)
    
    assert hist.shape == (10, 1, 256, 64)
    assert np.sum(hist) <= np.prod(chunk.shape)
@@ -114,7 +115,7 @@ def test_fill_histogram_out_of_range():
 def test_fill_histogram_various_shapes(shape):
    chunk = np.random.rand(*shape).astype(np.float32)
    h_bins = np.linspace(0, 1000, 101, dtype=np.float32)
-    hist, _ = fill_histogram(h_bins, chunk)
+    hist, _ = fill_histogram(chunk, h_bins)
    
    assert hist.shape == (100, *shape[1:])
    assert np.sum(hist) == np.prod(shape)