diff --git a/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb
new file mode 100755
index 0000000000000000000000000000000000000000..cce6da8569c7290c4989bd2963ab6d4725a0ef0b
--- /dev/null
+++ b/notebooks/Timepix/Compute_Timepix_Event_Centroids.ipynb
@@ -0,0 +1,444 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4d04f210-fc4a-4b1d-9dfe-9c85b94b884f",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "524fe654-e112-4abe-813c-a0be9b3a3034",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from time import time\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from extra_data import open_run, RunDirectory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "392aa413-0035-4861-9f1f-03abb51d0613",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib notebook\n",
+    "p900256"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3911b4e1-11af-4a51-be59-103ca9f6f261",
+   "metadata": {},
+   "source": [
+    "## Centroiding functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e36f997c-4b66-4b11-99a8-5887e3572f56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# centroiding\n",
+    "\n",
+    "import numpy as np\n",
+    "import scipy.ndimage as nd\n",
+    "from sklearn.cluster import DBSCAN\n",
+    "from warnings import warn\n",
+    "import traceback as tb\n",
+    "\n",
+    "error_msgs = {\n",
+    "            -1: \"tpx_data has an invalid structure - ignore provided data\",\n",
+    "            -2: \"tpx_data arrays are of invalid lengths - ignore provided data\",\n",
+    "            -3: \"tpx_data arrays are empty\"\n",
+    "        }\n",
+    "\n",
+    "def check_data(tpx_data):\n",
+    "    required_keys = [\"x\", \"y\", \"tof\", \"tot\"]\n",
+    "    for key in required_keys:\n",
+    "        if key not in tpx_data.keys():\n",
+    "            warn(\"tpx data must contain the keys %s, but key %s not in tpx data keys (%s)\" % (required_keys, key, list(tpx_data.keys())),\n",
+    "                 category=UserWarning)\n",
+    "            return -1\n",
+    "\n",
+    "    reference_n_samples_key = \"x\"\n",
+    "    n_samples = len(tpx_data[reference_n_samples_key])\n",
+    "    for key in tpx_data.keys():\n",
+    "        if n_samples != len(tpx_data[key]):\n",
+    "            warn(\"arrays in tpx data must be of same length ( len(tpx_data[%s])=%i!=%i=(len(tpx_data[%s]) )\" % (reference_n_samples_key, n_samples, len(tpx_data[key]), key),\n",
+    "                 category=UserWarning)\n",
+    "            return -2\n",
+    "    if n_samples == 0:\n",
+    "        warn(\"no samples were provides with tpx data\", category=UserWarning)\n",
+    "        return -3\n",
+    "    return 0\n",
+    "\n",
+    "def apply_single_filter(tpx_data, _filter):\n",
+    "    \"\"\"\n",
+    "    Simple function to apply a selecting or sorting filter to a dictionary of equally sized arrays\n",
+    "    Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    tpx_data: dictionary with timepix data, all arrays behind each key must be of same length\n",
+    "    _filter:  1d array or list of integers or booleans or np.s_ to select or sort data like a = a[_filter]\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    tpx_data: like input tpx_data but with applied filter\n",
+    "\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        for key in tpx_data.keys():\n",
+    "            tpx_data[key] = np.array(tpx_data[key])[_filter]\n",
+    "    except Exception as e:\n",
+    "        # print(tpx_data)\n",
+    "        print(_filter)\n",
+    "        print(_filter.dtype)\n",
+    "        print(_filter.shape)\n",
+    "        print(tpx_data[key].shape)\n",
+    "        raise e\n",
+    "    return tpx_data\n",
+    "\n",
+    "def pre_clustering_filter(tpx_data, tot_threshold=None):\n",
+    "    \"\"\"\n",
+    "    Collection of filters directly applied before clustering.\n",
+    "    Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    tpx_data:      dictionary with timepix data, all arrays behind each key must be of same length\n",
+    "    tot_threshold:\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    tpx_data: like input tpx_data but with applied filters\n",
+    "    \"\"\"\n",
+    "    if tot_threshold is not None:\n",
+    "        tpx_data = apply_single_filter(tpx_data, tpx_data[\"tot\"] >= tot_threshold)\n",
+    "\n",
+    "    return tpx_data\n",
+    "\n",
+    "def post_clustering_filter(tpx_data):\n",
+    "    \"\"\"\n",
+    "    Collection of filters directly applied after clustering.\n",
+    "    Note: at no point a copy of the dictionary is made, as they are mutable, the input array is changed in memory!\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    tpx_data:    dictionary with timepix data, all arrays behind each key must be of same length, now with key labels\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    tpx_data: like input tpx_data but with applied filters\n",
+    "    \"\"\"\n",
+    "    if tpx_data[\"labels\"] is not None:\n",
+    "        tpx_data = apply_single_filter(tpx_data, tpx_data[\"labels\"] != 0)\n",
+    "\n",
+    "    return tpx_data\n",
+    "\n",
+    "def clustering(tpx_data, epsilon=2, tof_scale=1e7, min_samples=3, n_jobs=1):\n",
+    "    \"\"\"\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    tpx_data\n",
+    "    epsilon\n",
+    "    tof_scale\n",
+    "    min_samples\n",
+    "    n_jobs\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "\n",
+    "    \"\"\"\n",
+    "    coords = np.column_stack((tpx_data[\"x\"], tpx_data[\"y\"], tpx_data[\"tof\"]*tof_scale))\n",
+    "    dist = DBSCAN(eps=epsilon, min_samples=min_samples, metric=\"euclidean\", n_jobs=n_jobs).fit(coords)\n",
+    "    return dist.labels_ + 1\n",
+    "\n",
+    "def empty_centroid_data():\n",
+    "    return {\n",
+    "        \"x\": np.array([]),\n",
+    "        \"y\": np.array([]),\n",
+    "        \"tof\": np.array([]),\n",
+    "        \"tot_avg\": np.array([]),\n",
+    "        \"tot_max\": np.array([]),\n",
+    "        \"size\": np.array([]),\n",
+    "    }\n",
+    "\n",
+    "def get_centroids(tpx_data, timewalk_lut=None):\n",
+    "    centroid_data = empty_centroid_data()\n",
+    "    cluster_labels, cluster_size = np.unique(tpx_data[\"labels\"], return_counts=True)\n",
+    "\n",
+    "    cluster_tot_peaks = np.array(nd.maximum_position(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels)).ravel()\n",
+    "    cluster_tot_integrals = nd.sum(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels)\n",
+    "\n",
+    "    # compute centroid center through weighted average\n",
+    "    centroid_data[\"x\"] = np.array(nd.sum(tpx_data[\"x\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n",
+    "    centroid_data[\"y\"] = np.array(nd.sum(tpx_data[\"y\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n",
+    "    centroid_data[\"tof\"] = np.array(nd.sum(tpx_data[\"tof\"] * tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels) / cluster_tot_integrals).ravel()\n",
+    "\n",
+    "    # intensity & size information\n",
+    "    centroid_data[\"tot_avg\"] = np.array(nd.mean(tpx_data[\"tot\"], labels=tpx_data[\"labels\"], index=cluster_labels))\n",
+    "    centroid_data[\"tot_max\"] = tpx_data[\"tot\"][cluster_tot_peaks]\n",
+    "    centroid_data[\"tot\"] = np.array(cluster_tot_integrals)\n",
+    "    centroid_data[\"size\"] = cluster_size\n",
+    "\n",
+    "    # train ID information\n",
+    "    # ~ centroid_data[\"tid\"] = tpx_data[\"tid\"][cluster_tot_peaks]\n",
+    "\n",
+    "    # correct for timewalk if provided\n",
+    "    if timewalk_lut is not None:\n",
+    "        centroid_data[\"tof\"] -= timewalk_lut[np.int_(centroid_data[\"tot_max\"] // 25) - 1] * 1e3\n",
+    "    return centroid_data\n",
+    "\n",
+    "def compute_centroids(x, y, tof, tot,\n",
+    "                      threshold_tot=None,\n",
+    "                      clustering_epsilon=2,\n",
+    "                      clustering_tof_scale=1e7,\n",
+    "                      clustering_min_samples=3,\n",
+    "                      clustering_n_jobs=1,\n",
+    "                      centroiding_timewalk_lut=None):\n",
+    "    # format input data\n",
+    "    _tpx_data = {\n",
+    "        \"x\": x,\n",
+    "        \"y\": y,\n",
+    "        \"tof\": tof,\n",
+    "        \"tot\": tot\n",
+    "    }\n",
+    "\n",
+    "    # ensure that valid data is available\n",
+    "    data_validation = check_data(_tpx_data)\n",
+    "    if data_validation < 0:\n",
+    "        if data_validation in error_msgs.keys():\n",
+    "            print(\"Data validation failed with message: %s\" % error_msgs[data_validation])\n",
+    "        else:\n",
+    "            print(\"Data validation failed: unknown reason\")\n",
+    "        return None\n",
+    "\n",
+    "    # clustering (identify clusters in 2d data (x,y,tof) that belong to a single hit,\n",
+    "    # each sample belonging to a cluster is labeled with an integer cluster id no)\n",
+    "    _tpx_data = pre_clustering_filter(_tpx_data, tot_threshold=threshold_tot)\n",
+    "    _tpx_data[\"labels\"] = clustering(_tpx_data, epsilon=clustering_epsilon, tof_scale=clustering_tof_scale, min_samples=clustering_min_samples, n_jobs=clustering_n_jobs)\n",
+    "    _tpx_data = post_clustering_filter(_tpx_data)\n",
+    "    # compute centroid data (reduce cluster of samples to a single point with properties)\n",
+    "    if _tpx_data[\"labels\"] is None or _tpx_data[\"labels\"].size == 0:\n",
+    "        # handle case of no identified clusters, return empty dictionary with expected keys\n",
+    "        return empty_centroid_data()\n",
+    "    _centroids = get_centroids(_tpx_data, timewalk_lut=centroiding_timewalk_lut)\n",
+    "    return _centroids"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f54e558-d862-48af-86b9-7d9e3d53fd3d",
+   "metadata": {},
+   "source": [
+    "## Output Function (for illustration) - instead data should be written to output file train-id wise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83acf826-e351-4a39-a870-6c1e0298e711",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def output(train_id, centroids, fixed_len=None):\n",
+    "    \"\"\"\n",
+    "    In this version the resulting vectors within a run vary in length,\n",
+    "    if fixed len is provided fixed length vectors are generated\n",
+    "    \"\"\"\n",
+    "    if fixed_len is None:\n",
+    "        out = {\n",
+    "            \"%s[data.trainId]\"%event_output_key : train_id,\n",
+    "            \"%s[data.x]\"%event_output_key : centroids[\"x\"],\n",
+    "            \"%s[data.y]\"%event_output_key : centroids[\"y\"],\n",
+    "            \"%s[data.toa]\"%event_output_key : centroids[\"tof\"],\n",
+    "            \"%s[data.tot_avg]\"%event_output_key : centroids[\"tot_avg\"],\n",
+    "            \"%s[data.tot_max]\"%event_output_key : centroids[\"tot_max\"],\n",
+    "            \"%s[data.size]\"%event_output_key : centroids[\"size\"],\n",
+    "        }\n",
+    "    else:\n",
+    "        if len(centroids[\"x\"])>fixed_len:\n",
+    "            sel = np.s_[:fixed_len]\n",
+    "            out = {\n",
+    "                \"%s[data.trainId]\"%event_output_key : train_id,\n",
+    "                \"%s[data.x]\"%event_output_key : centroids[\"x\"][sel],\n",
+    "                \"%s[data.y]\"%event_output_key : centroids[\"y\"][sel],\n",
+    "                \"%s[data.toa]\"%event_output_key : centroids[\"tof\"][sel],\n",
+    "                \"%s[data.tot_avg]\"%event_output_key : centroids[\"tot_avg\"][sel],\n",
+    "                \"%s[data.tot_max]\"%event_output_key : centroids[\"tot_max\"][sel],\n",
+    "                \"%s[data.centroid_size]\"%event_output_key : centroids[\"size\"][sel],\n",
+    "                \"%s[data.size]\"%event_output_key : len(centroids[\"x\"]),\n",
+    "            }\n",
+    "        else:\n",
+    "            pad = fixed_len - len(centroids[\"x\"])\n",
+    "            out = {\n",
+    "                \"%s[data.trainId]\"%event_output_key : train_id, # int 0d\n",
+    "                \"%s[data.x]\"%event_output_key : np.pad(centroids[\"x\"], (0,pad), constant_values=1),  # float 1d\n",
+    "                \"%s[data.y]\"%event_output_key : np.pad(centroids[\"y\"], (0,pad), constant_values=1),  # float 1d\n",
+    "                \"%s[data.toa]\"%event_output_key : np.pad(centroids[\"tof\"], (0,pad), constant_values=1),  # float 1d\n",
+    "                \"%s[data.tot_avg]\"%event_output_key : np.pad(centroids[\"tot_avg\"], (0,pad), constant_values=1),  # float 1d\n",
+    "                \"%s[data.tot_max]\"%event_output_key : np.pad(centroids[\"tot_max\"], (0,pad), constant_values=1),  # float 1d\n",
+    "                \"%s[data.centroid_size]\"%event_output_key : np.pad(centroids[\"size\"], (0,pad), constant_values=1),  # int 1d\n",
+    "                \"%s[data.size]\"%event_output_key : len(centroids[\"x\"]),  # int 0d\n",
+    "            }\n",
+    "#     print(out)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a39ce0e4-31b3-40c7-9c80-72b89d037714",
+   "metadata": {},
+   "source": [
+    "## Settings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8318bc3f-01be-4144-a4de-ebd736563396",
+   "metadata": {},
+   "source": [
+    "### Data selection (for dev)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "693c72cd-9c67-46d6-a0bc-438126e62642",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data selection parameters.\n",
+    "run_id = 420\n",
+    "proposal = 900256\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13956e56-d818-42e4-aa73-7f9c4a897732",
+   "metadata": {},
+   "source": [
+    "### Parameters to be exposed with calibration pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9adf469-32de-43b4-91fa-2508a952d0dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Centroiding and output parameters that should be easily adjustable\n",
+    "max_n_centroids = 10000\n",
+    "\n",
+    "clustering_epsilon = 2\n",
+    "clustering_tof_scale = 1e7\n",
+    "clustering_min_samples = 3\n",
+    "clustering_n_jobs = 1\n",
+    "threshold_tot = None\n",
+    "raw_timewalk_lut_filepath = None  # fpath to look up table for timewalk correction or None\n",
+    "centroiding_timewalk_lut_filepath = None  # fpath to look up table for timewalk correction or None\n",
+    "\n",
+    "# raw_timewalk_lut_filepath = \"timewalk_raw.npy\"  # fpath to look up table for timewalk correction or None\n",
+    "# centroiding_timewalk_lut_filepath = \"timewalk_cent.npy\"  # fpath to look up table for timewalk correction or None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c6dff850-18c8-4d5a-9362-e581cce18d8e",
+   "metadata": {},
+   "source": [
+    "## Centroiding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56306c15-513e-4e7f-9c47-c52ca61b27a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run = open_run(run=run_id, proposal=proposal)\n",
+    "event_output_key = 'SQS_AQS_CAM/CAM/TIMEPIX3:daqEventOutput'\n",
+    "\n",
+    "if raw_timewalk_lut_filepath is not None:\n",
+    "    raw_timewalk_lut = np.load(raw_timewalk_lut_filepath)\n",
+    "else:\n",
+    "    raw_timewalk_lut = None\n",
+    "\n",
+    "if centroiding_timewalk_lut_filepath is not None:\n",
+    "    centroiding_timewalk_lut = np.load(centroiding_timewalk_lut_filepath)\n",
+    "else:\n",
+    "    centroiding_timewalk_lut = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c28a4c8-961c-496b-80da-7fd867e5b0d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t00 = time()\n",
+    "for train_id, data in run.select(event_output_key, \"data.*\").trains():\n",
+    "    t0 = time()\n",
+    "    sel = np.s_[:data[event_output_key][\"data.size\"]]\n",
+    "    \n",
+    "    tot = np.array(data[event_output_key][\"data.tot\"][sel])\n",
+    "    toa = np.array(data[event_output_key][\"data.toa\"][sel])\n",
+    "    \n",
+    "    if raw_timewalk_lut is not None:\n",
+    "        toa -= raw_timewalk_lut[np.int_(tot // 25) - 1] * 1e3\n",
+    "    \n",
+    "    centroids = compute_centroids(data[event_output_key][\"data.x\"][sel], \n",
+    "                                  data[event_output_key][\"data.y\"][sel], \n",
+    "                                  toa, \n",
+    "                                  tot,\n",
+    "                                  threshold_tot=threshold_tot,\n",
+    "                                  clustering_epsilon=clustering_epsilon,\n",
+    "                                  clustering_tof_scale=clustering_tof_scale,\n",
+    "                                  clustering_min_samples=clustering_min_samples,\n",
+    "                                  clustering_n_jobs=clustering_n_jobs,\n",
+    "                                  centroiding_timewalk_lut=centroiding_timewalk_lut)\n",
+    "    \n",
+    "#     print(time()-t0)\n",
+    "    output(train_id, centroids, fixed_len=max_n_centroids)\n",
+    "#     if time()-t00 > 0:\n",
+    "#         break\n",
+    "print(time()-t00)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pycal",
+   "language": "python",
+   "name": "pycal"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}