From d059ae0afd84ba3b72351e3741134ca74835bb99 Mon Sep 17 00:00:00 2001
From: karnem <>
Date: Tue, 17 Mar 2020 12:47:49 +0100
Subject: [PATCH] Refactor lpd notebooks to use karabo_da

 notebooks/LPD/LPDChar_Darks_NBC.ipynb      |  92 +++++++------
 notebooks/LPD/LPD_Correct_and_Verify.ipynb | 153 ++++++++-------------
 2 files changed, 112 insertions(+), 133 deletions(-)

diff --git a/notebooks/LPD/LPDChar_Darks_NBC.ipynb b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
index 0e878db3a..d710ba8af 100644
--- a/notebooks/LPD/LPDChar_Darks_NBC.ipynb
+++ b/notebooks/LPD/LPDChar_Darks_NBC.ipynb
@@ -58,29 +58,36 @@
     "cluster_profile = \"noDB\" # The ipcluster profile to use\n",
     "in_folder = \"/gpfs/exfel/exp/FXE/201931/p900088/raw\" # path to input data, required\n",
     "out_folder = \"/gpfs/exfel/data/scratch/karnem/LPD/\" # path to output to, required\n",
-    "sequences = [0] # sequence files to evaluate\n",
+    "sequence = 0 # sequence files to evaluate\n",
     "modules = [-1] # list of modules to evaluate, RANGE ALLOWED\n",
-    "\n",
-    "capacitor_setting = 5 # capacitor_setting for which data was taken, required\n",
     "run_high = 112 # run number in which high gain data was recorded, required\n",
     "run_med = 113 # run number in which medium gain data was recorded, required\n",
     "run_low = 114 # run number in which low gain data was recorded, required\n",
-    "mem_cells = 512 # number of memory cells used\n",
-    "local_output = True # output constants locally\n",
-    "db_output = False # output constants to database\n",
-    "bias_voltage = 250 # detector bias voltage\n",
+    "karabo_id = \"FXE_DET_LPD1M-1\" # karabo karabo_id\n",
+    "karabo_da = [-1]  # data aggregators\n",
+    "receiver_id = \"{}CH0\" # inset for receiver devices\n",
+    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
+    "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
+    "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
+    "\n",
+    "use_dir_creation_date = True # use the creation date of the directory for database time derivation\n",
     "cal_db_interface = \"tcp://max-exfl016:8015#8025\" # the database interface to use\n",
     "cal_db_timeout = 300000 # timeout on caldb requests\"\n",
+    "local_output = True # output constants locally\n",
+    "db_output = False # output constants to database\n",
+    "capacitor_setting = 5 # capacitor_setting for which data was taken, required\n",
+    "mem_cells = 512 # number of memory cells used\n",
+    "bias_voltage = 250 # detector bias voltage\n",
     "thresholds_offset_sigma = 3. # bad pixel relative threshold in terms of n sigma offset\n",
     "thresholds_offset_hard = [400, 1500] # bad pixel hard threshold\n",
     "thresholds_noise_sigma = 7. # bad pixel relative threshold in terms of n sigma noise\n",
     "thresholds_noise_hard = [1, 35] # bad pixel hard threshold\n",
     "skip_first_ntrains = 10 # Number of first trains to skip\n",
-    "not_use_dir_creation_date = False # do not use the creation date of the directory for database time derivation\n",
+    "\n",
     "instrument = \"FXE\" # instrument name\n",
-    "ntrains = 300 # number of trains to use\n",
+    "ntrains = 100 # number of trains to use\n",
     "high_res_badpix_3d = False # plot bad-pixel summary in high resolution\n",
     "test_for_normality = False # permorm normality test"
@@ -116,7 +123,7 @@
     "from iCalibrationDB import (ConstantMetaData, Constants, \n",
     "                            Conditions, Detectors, \n",
     "                            Versions)\n",
-    "from import (gain_map_files, parse_runs, \n",
+    "from import (map_gain_stages, parse_runs, \n",
     "                             run_prop_seq_from_path, \n",
     "                             get_notebook_name, \n",
     "                             get_dir_creation_date, get_from_db,\n",
@@ -141,29 +148,32 @@
     "max_cells = mem_cells\n",
     "cells = np.arange(max_cells)\n",
     "gain_names = ['High', 'Medium', 'Low']\n",
-    "\n",
-    "if modules[0] == -1:\n",
-    "    modules = list(range(16))\n",
+    "    \n",
+    "if karabo_da[0] == -1:\n",
+    "    if modules[0] == -1:\n",
+    "        modules = list(range(16))\n",
+    "    karabo_da = ['LPD{:02d}'.format(i) for i in modules]\n",
+    "else:\n",
+    "    modules = [int(x[-2:]) for x in karabo_da]\n",
     "gain_runs = OrderedDict()\n",
     "if capacitor_setting == 5:\n",
-    "    gain_runs[\"high_5pf\"] = \"r{:04d}\".format(run_high)\n",
-    "    gain_runs[\"med_5pf\"] =  \"r{:04d}\".format(run_med)\n",
-    "    gain_runs[\"low_5pf\"] =  \"r{:04d}\".format(run_low)\n",
+    "    gain_runs[\"high_5pf\"] = run_high\n",
+    "    gain_runs[\"med_5pf\"] =  run_med\n",
+    "    gain_runs[\"low_5pf\"] =  run_low\n",
     "elif capacitor_setting == 50:\n",
-    "    gain_runs[\"high_50pf\"] = \"r{:04d}\".format(run_high)\n",
-    "    gain_runs[\"med_50pf\"] =  \"r{:04d}\".format(run_med)\n",
-    "    gain_runs[\"low_50pf\"] =  \"r{:04d}\".format(run_low)\n",
+    "    gain_runs[\"high_50pf\"] = run_high\n",
+    "    gain_runs[\"med_50pf\"] =  run_med\n",
+    "    gain_runs[\"low_50pf\"] =  run_low\n",
     "capacitor_settings = [capacitor_setting]\n",
     "capacitor_settings = ['{}pf'.format(c) for c in capacitor_settings]\n",
-    "QUADRANTS = 4\n",
-    "MODULES_PER_QUAD = 4\n",
-    "DET_FILE_INSET = \"LPD\"\n",
+    "h5path = h5path.format(karabo_id, receiver_id)\n",
+    "h5path_idx = h5path_idx.format(karabo_id, receiver_id)\n",
     "creation_time = None\n",
-    "if not not_use_dir_creation_date:\n",
+    "if use_dir_creation_date:\n",
     "    creation_time = get_dir_creation_date(in_folder, run_high)\n",
     "    print(\"Using {} as creation time\".format(creation_time))\n",
@@ -178,7 +188,7 @@
     "print(\"Proposal: {}\".format(prop))\n",
     "print(\"Memory cells: {}/{}\".format(mem_cells, max_cells))\n",
     "print(\"Runs: {}, {}, {}\".format(run_high, run_med, run_low))\n",
-    "print(\"Sequences: {}\".format(sequences))\n",
+    "print(\"Sequence: {}\".format(sequence))\n",
     "print(\"Using DB: {}\".format(db_output))\n",
     "print(\"Input: {}\".format(in_folder))\n",
     "print(\"Output: {}\".format(out_folder))\n",
@@ -192,10 +202,9 @@
    "outputs": [],
    "source": [
     "# set everything up filewise\n",
-    "os.makedirs(out_folder, exist_ok=True)\n",
-    "\n",
-    "gmf = gain_map_files(in_folder, gain_runs, sequences, DET_FILE_INSET, QUADRANTS, MODULES_PER_QUAD)\n",
-    "gain_mapped_files, total_sequences, total_file_size = gmf"
+    "gmf = map_gain_stages(in_folder, gain_runs, path_template, karabo_da, [sequence])\n",
+    "gain_mapped_files, total_sequences, total_file_size = gmf\n",
+    "print(f\"Will process a total of {total_sequences} sequences.\")"
@@ -211,8 +220,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# the actual characterization - to not eded this without consultation\n",
-    "def characterize_module(cells, bp_thresh, skip_first_ntrains, ntrains, test_for_normality, inp):\n",
+    "# the actual characterization\n",
+    "def characterize_module(cells, bp_thresh, skip_first_ntrains, ntrains, test_for_normality, \n",
+    "                        h5path, h5path_idx, inp):\n",
     "    import numpy as np\n",
     "    import copy\n",
     "    import h5py\n",
@@ -233,18 +243,18 @@
     "    infile = h5py.File(filename, \"r\", driver=\"core\")\n",
     "    \n",
-    "    bpath = \"/INDEX/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image\".format(channel)\n",
-    "    count = infile[f\"{bpath}/count\"][()]\n",
-    "    first = infile[f\"{bpath}/first\"][()]\n",
+    "    h5path = h5path.format(channel)\n",
+    "    h5path_idx = h5path_idx.format(channel)\n",
+    "    print(h5path_idx)\n",
+    "    count = infile[f\"{h5path_idx}/count\"][()]\n",
+    "    first = infile[f\"{h5path_idx}/first\"][()]\n",
     "    valid = count != 0\n",
     "    count, first = count[valid], first[valid]\n",
     "    first_image = int(first[skip_first_ntrains] if first.shape[0] > skip_first_ntrains else 0)\n",
     "    last_image = int(first_image + np.sum(count[skip_first_ntrains:skip_first_ntrains+ntrains]))\n",
-    "    im = np.array(infile[\"/INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data\".format(\n",
-    "        channel)][first_image:last_image, ...])\n",
-    "    cellid = np.squeeze(np.array(infile[\"/INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/cellId\".format(\n",
-    "        channel)][first_image:last_image, ...]))\n",
+    "    im = np.array(infile[\"{}/data\".format(h5path, channel)][first_image:last_image, ...])\n",
+    "    cellid = np.squeeze(np.array(infile[\"{}/cellId\".format(h5path, channel)][first_image:last_image, ...]))\n",
     "    infile.close()\n",
     "    im, g = splitOffGainLPD(im[:, 0, ...])\n",
@@ -323,11 +333,11 @@
     "    gg+=1\n",
-    "\n",
     "p = partial(characterize_module, max_cells,\n",
     "                (thresholds_offset_hard, thresholds_offset_sigma,\n",
     "                 thresholds_noise_hard, thresholds_noise_sigma),\n",
-    "                skip_first_ntrains, ntrains, test_for_normality)\n",
+    "                skip_first_ntrains, ntrains, test_for_normality,\n",
+    "            h5path, h5path_idx)\n",
     "results = view.map_sync(p, inp)\n",
     "for ir, r in enumerate(results):\n",
@@ -1185,7 +1195,7 @@
     "                else:\n",
     "                    line += ['-']\n",
     "                    \n",
-    "            table.append(line)\n",
+    "                table.append(line)\n",
     "display(Markdown('### Number of bad pixels ###'.format(qm)))\n",
     "if len(table)>0:\n",
@@ -1252,7 +1262,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.7.6"
  "nbformat": 4,
diff --git a/notebooks/LPD/LPD_Correct_and_Verify.ipynb b/notebooks/LPD/LPD_Correct_and_Verify.ipynb
index a61ed7762..0c731274e 100644
--- a/notebooks/LPD/LPD_Correct_and_Verify.ipynb
+++ b/notebooks/LPD/LPD_Correct_and_Verify.ipynb
@@ -20,58 +20,51 @@
    "outputs": [],
    "source": [
+    "cluster_profile = \"noDB\" # cluster profile to use\n",
     "in_folder = \"/gpfs/exfel/exp/FXE/201931/p900088/raw/\" # the folder to read data from, required\n",
+    "out_folder = \"/gpfs/exfel/data/scratch/karnem/test_1/lpd_correct_006\" # the folder to output to, required\n",
+    "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n",
+    "modules = [-1] # modules to correct, set to -1 for all, range allowed\n",
     "run = 270 # runs to process, required\n",
-    "out_folder = \"/gpfs/exfel/data/scratch/<username>/outputpath\" # the folder to output to, required\n",
+    "\n",
+    "karabo_id = \"FXE_DET_LPD1M-1\" # karabo karabo_id\n",
+    "karabo_da = [-1]  # data aggregators\n",
+    "receiver_id = \"{}CH0\" # inset for receiver devices\n",
+    "path_template = 'RAW-R{:04d}-{}-S{:05d}.h5' # the template to use to access data\n",
+    "h5path = '/INSTRUMENT/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
+    "h5path_idx = '/INDEX/{}/DET/{}:xtdf/image' # path in the HDF5 file to images\n",
+    "\n",
+    "use_dir_creation_date = True # use the creation date of the directory for database time derivation\n",
+    "cal_db_interface = \"tcp://max-exfl016:8015#8020\" # the database interface to use\n",
+    "cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds\n",
+    "\n",
+    "\n",
     "calfile =  \"/gpfs/exfel/data/scratch/xcal/lpd_store_0519.h5\" # path to constants extracted from the db into a file\n",
-    "sequences = [-1] # sequences to correct, set to -1 for all, range allowed\n",
     "mem_cells = 512 # memory cells in data\n",
     "overwrite = True # set to True if existing data should be overwritten\n",
     "no_relative_gain = False # do not do relative gain correction\n",
     "no_flat_fields = False # do not do flat field correction\n",
-    "cluster_profile = \"noDB\" # cluster profile to use\n",
     "max_pulses = 512 # maximum number of pulses per train\n",
-    "use_now_as_creation_date = False # do not use dir creation data, but now\n",
     "no_non_linear_corrections = False # do not apply non-linear corrections\n",
-    "\n",
     "max_cells_db = 512 # maximum cells for data from the database\n",
     "rawversion = 2 # raw format version\n",
-    "instrument = \"FXE\" # the instrument\n",
     "capacitor = '5pF' # capacitor setting: 5pF or 50pF\n",
     "photon_energy = 9.2 # the photon energy in keV\n",
-    "\n",
     "nodb = False # set to true if db input is to be avoided\n",
     "bias_voltage = 250 # detector bias voltage\n",
-    "cal_db_interface = \"tcp://max-exfl016:8015#8020\" # the database interface to use\n",
     "geometry_file = \"/gpfs/exfel/d/cal/exchange/lpdMF_00.h5\" # the geometry file to use, MAR 2018\n",
     "beam_center_offset =  [1.5, 1] # offset from the beam center, MAR 2018\n",
     "sequences_per_node = 1 # sequence files to process per node\n",
-    "cal_db_timeout = 30000 # timeout for calibration db requests in milliseconds\n",
+    "\n",
     "dont_mark_non_lin_region = False # do not mark non-linear regions in BP map\n",
     "linear_between_high_gain = [-5000, 2500]  # region in which high gain is considered linear, in ADU\n",
     "linear_between_med_gain = [300, 3000]  # region in which medium gain is considered linear, in ADU\n",
     "linear_between_low_gain = [300, 3000]  # region in which low gain is considered linear, in ADU\n",
     "nlc_version = 2 # version of NLC to use\n",
-    "def balance_sequences(in_folder, run, sequences, sequences_per_node):\n",
-    "    import glob\n",
-    "    import re\n",
-    "    import numpy as np\n",
-    "    if sequences[0] == -1:\n",
-    "        sequence_files = glob.glob(\"{}/r{:04d}/*-S*.h5\".format(in_folder, run))\n",
-    "        seq_nums = set()\n",
-    "        for sf in sequence_files:\n",
-    "            seqnum = re.findall(r\".*-S([0-9]*).h5\", sf)[0]\n",
-    "            seq_nums.add(int(seqnum))\n",
-    "        seq_nums -= set(sequences)\n",
-    "        nsplits = len(seq_nums)//sequences_per_node+1\n",
-    "        while nsplits > 16:\n",
-    "            sequences_per_node += 1\n",
-    "            nsplits = len(seq_nums)//sequences_per_node+1\n",
-    "            print(\"Changed to {} sequences per node to have a maximum of 8 concurrent jobs\".format(sequences_per_node))\n",
-    "        return [l.tolist() for l in np.array_split(list(seq_nums), nsplits) if len(l)]\n",
-    "    else:\n",
-    "        return sequences"
+    "def balance_sequences(in_folder, run, sequences, sequences_per_node, karabo_da):\n",
+    "    from xfel_calibrate.calibrate import balance_sequences as bs\n",
+    "    return bs(in_folder, run, sequences, sequences_per_node, karabo_da)\n"
@@ -94,6 +87,15 @@
     "if sequences[0] == -1:\n",
     "    sequences = None\n",
+    "\n",
+    "if karabo_da[0] == -1:\n",
+    "    if modules[0] == -1:\n",
+    "        modules = list(range(16))\n",
+    "    karabo_da = ['LPD{:02d}'.format(i) for i in modules]\n",
+    "else:\n",
+    "    modules = [int(x[-2:]) for x in karabo_da]\n",
+    "print(\"Process modules: \", \n",
+    "      ', '.join([f\"Q{x // 4 + 1}M{x % 4 + 1}\" for x in modules]))\n",
     "    \n",
     "do_rel_gain = not no_relative_gain\n",
     "do_ff = not no_flat_fields\n",
@@ -113,7 +115,9 @@
     "from ipyparallel import Client\n",
     "from iCalibrationDB import ConstantMetaData, Constants, Conditions, Detectors, Versions\n",
-    "from import gain_map_files, parse_runs, run_prop_seq_from_path, get_notebook_name, get_dir_creation_date, get_constant_from_db\n",
+    "from import (gain_map_files, parse_runs, run_prop_seq_from_path, \n",
+    "                             get_notebook_name, get_dir_creation_date, \n",
+    "                             get_constant_from_db, map_modules_from_folder)\n",
     "from cal_tools.influx import InfluxLogger\n",
     "from cal_tools.enums import BadPixels\n",
     "from cal_tools.plotting import show_overview, plot_badpix_3d, create_constant_overview\n",
@@ -130,11 +134,6 @@
     "gains = np.arange(3)\n",
     "cells = np.arange(max_cells)\n",
-    "use_dir_creation_date = not use_now_as_creation_date\n",
-    "\n",
-    "QUADRANTS = 4\n",
-    "MODULES_PER_QUAD = 4\n",
-    "DET_FILE_INSET = \"LPD\"\n",
     "CHUNK_SIZE = 512\n",
     "MAX_PAR = 32\n",
@@ -151,16 +150,18 @@
     "    \n",
     "print(\"Using {} as creation time\".format(creation_time.isoformat()))\n",
-    "in_folder = \"{}/r{:04d}\".format(in_folder, run)\n",
-    "\n",
-    "run, proposal, seq = run_prop_seq_from_path(in_folder)\n",
+    "_, proposal, seq = run_prop_seq_from_path(in_folder)\n",
+    "instrument = karabo_id.split(\"_\")[0]\n",
     "logger = InfluxLogger(detector=\"LPD\", instrument=instrument, mem_cells=mem_cells,\n",
     "                      notebook=get_notebook_name(), proposal=proposal)\n",
     "client = InfluxDBClient('exflqr18318', 8086, 'root', 'root', 'calstats')\n",
     "mark_non_lin_region = not dont_mark_non_lin_region\n",
-    "linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]"
+    "linear_between = [linear_between_high_gain, linear_between_med_gain, linear_between_low_gain]\n",
+    "\n",
+    "h5path = h5path.format(karabo_id, receiver_id)\n",
+    "h5path_idx = h5path_idx.format(karabo_id, receiver_id)"
@@ -175,50 +176,8 @@
    "outputs": [],
    "source": [
     "# set everything up filewise\n",
-    "from queue import Queue\n",
-    "from collections import OrderedDict\n",
-    "#if not os.path.exists(out_folder):\n",
-    "#    os.makedirs(out_folder)\n",
-    "#elif not overwrite:\n",
-    "#    raise AttributeError(\"Output path exists! Exiting\")\n",
-    " \n",
-    "def map_modules_from_files(filelist):\n",
-    "    module_files = OrderedDict()\n",
-    "    mod_ids = OrderedDict()\n",
-    "    total_sequences = 0\n",
-    "    sequences_qm = {}\n",
-    "    for quadrant in range(0, QUADRANTS):\n",
-    "        for module in range(0, MODULES_PER_QUAD):\n",
-    "            name = \"Q{}M{}\".format(quadrant + 1, module + 1)\n",
-    "            module_files[name] = Queue()\n",
-    "            num = quadrant * 4 + module\n",
-    "            mod_ids[name] = num\n",
-    "            file_infix = \"{}{:02d}\".format(DET_FILE_INSET, num)\n",
-    "            sequences_qm[name] = 0\n",
-    "            for file in filelist:\n",
-    "                if file_infix in file:\n",
-    "                    module_files[name].put(file)\n",
-    "                    total_sequences += 1\n",
-    "                    sequences_qm[name] += 1\n",
-    "                \n",
-    "    return module_files, mod_ids, total_sequences, sequences_qm\n",
-    "\n",
-    "dirlist = sorted(os.listdir(in_folder))\n",
-    "file_list = []\n",
-    "\n",
-    "for entry in dirlist:\n",
-    "    #only h5 file\n",
-    "    abs_entry = \"{}/{}\".format(in_folder, entry)\n",
-    "    if os.path.isfile(abs_entry) and os.path.splitext(abs_entry)[1] == \".h5\":\n",
-    "        \n",
-    "        if sequences is None:\n",
-    "            file_list.append(abs_entry)\n",
-    "        else:\n",
-    "            for seq in sequences:\n",
-    "                if \"{:05d}.h5\".format(seq) in abs_entry:\n",
-    "                    file_list.append(os.path.abspath(abs_entry))\n",
-    "                    \n",
-    "mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)\n",
+    "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n",
+    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf\n",
     "MAX_PAR = min(MAX_PAR, total_sequences)"
@@ -258,7 +217,8 @@
     "        ti += 1\n",
     "md = display(Latex(tabulate.tabulate(table, tablefmt='latex', headers=[\"#\", \"module\", \"# module\", \"file\"])))      \n",
     "# restore the queue\n",
-    "mapped_files, mod_ids, total_sequences, sequences_qm = map_modules_from_files(file_list)"
+    "mmf = map_modules_from_folder(in_folder, run, path_template, karabo_da, sequences)\n",
+    "mapped_files, mod_ids, total_sequences, sequences_qm, _ = mmf"
@@ -277,7 +237,7 @@
     "def correct_module(max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm, \n",
     "                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses,\n",
     "                   dbparms, fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between,\n",
-    "                   nlc_version, inp):\n",
+    "                   nlc_version, h5path, h5path_idx, inp):\n",
     "    import numpy as np\n",
     "    import copy\n",
     "    import h5py\n",
@@ -335,13 +295,21 @@
     "        infile = h5py.File(filename, \"r\", driver=\"core\")\n",
     "        outfile = h5py.File(filename_out, \"w\")\n",
+    "        \n",
+    "        # LPD correction requires path without the leading \"/\"\"\n",
+    "        if h5path[0] == '/':\n",
+    "            h5path = h5path[1:]\n",
+    "        if h5path_idx[0] == '/':\n",
+    "            h5path_idx = h5path_idx[1:]\n",
+    "            \n",
     "        try:\n",
     "            lpd_corr = LpdCorrections(infile, outfile, max_cells, channel, max_pulses,\n",
     "                                      bins_gain_vs_signal, bins_signal_low_range,\n",
     "                                      bins_signal_high_range, do_ff=do_ff, raw_fmt_version=index_v,\n",
     "                                      correct_non_linear=(not no_non_linear_corrections),\n",
     "                                      mark_non_lin_region=mark_non_lin_region, linear_between=linear_between,\n",
-    "                                      nlc_version=nlc_version)\n",
+    "                                      nlc_version=nlc_version,\n",
+    "                                      h5_data_path=h5path, h5_index_path=h5path_idx)\n",
     "        \n",
     "            try:\n",
@@ -424,7 +392,8 @@
     "        print(\"Running {} tasks parallel\".format(len(inp)))\n",
     "        p = partial(correct_module, max_cells, do_ff, index_v, CHUNK_SIZE, total_sequences, sequences_qm,\n",
     "                   bins_gain_vs_signal, bins_signal_low_range, bins_signal_high_range, max_pulses, dbparms,\n",
-    "                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version)\n",
+    "                   fileparms, nodb, no_non_linear_corrections, mark_non_lin_region, linear_between, nlc_version,\n",
+    "                   h5path, h5path_idx)\n",
     "        \n",
     "        r = view.map_sync(p, inp)\n",
     "        #r = list(map(p, inp))\n",
@@ -600,7 +569,7 @@
     "import cal_tools.metrology as metro\n",
     "in_files = \"{}/CORR*LPD*S{:05d}*.h5\".format(out_folder, sequences[0] if sequences else 0)\n",
-    "datapath = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data\"\n",
+    "datapath = \"{}/data\".format(h5path)\n",
     "print(\"Preview is from {}\".format(in_files))"
@@ -616,8 +585,8 @@
    "outputs": [],
    "source": [
     "posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)\n",
-    "datapath = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/mask\"\n",
-    "maskedarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 10)"
+    "maskpath = \"{}/mask\".format(h5path)\n",
+    "maskedarr = metro.positionFileList(in_files, maskpath, geometry_file, d_quads, nImages = 10)"
@@ -820,8 +789,8 @@
    "outputs": [],
    "source": [
-    "datapath = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/gain\"\n",
-    "posarr = metro.positionFileList(in_files, datapath, geometry_file, d_quads, nImages = 100)"
+    "gainpath = \"{}/gain\".format(h5path)\n",
+    "posarr = metro.positionFileList(in_files, gainpath, geometry_file, d_quads, nImages = 100)"
@@ -874,7 +843,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.7.6"
  "nbformat": 4,