{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Mine XGM Data and LPD Profile #\n",
    "\n",
    "Author: S. Hauf, Version: 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dc = [1.5, 1] # center offset\n",
    "geometry_file = \"/gpfs/exfel/d/cal/exchange/lpdMF_00.h5\" # the geometry file to use, MAR 2018\n",
    "proposal_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/proc/\" # folder under which runs can be found\n",
    "xgm_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/raw/\" # folder under which runs can be found\n",
    "adc_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/raw/\" # folder under which runs can be found\n",
    "file_prefix = \"CORR-R{:04d}-LPD\" # prefix of each data file - should be a template to replace run number\n",
    "raw_prefix = \"RAW-R{:04d}-LPD\" # prefix of each data file - should be a template to replace run number\n",
    "xgm_prefix = \"RAW-R{:04d}-DA03\" # prefix for XGM file - should be a template to replace run number\n",
    "adc_prefix = \"RAW-R{:04d}-DA01\" # prefix for ADC file - should be a template to replace run number\n",
    "data_path = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data\" # path to data in file\n",
    "gain_path = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/gain\" # path to data in file\n",
    "xgm_path = \"/INSTRUMENT/SA1_XTD2_XGM/DOOCS/MAIN:output/data/intensityTD\" # path to XGM data\n",
    "xgm_path_td = \"/INSTRUMENT/SA1_XTD2_XGM/DOOCS/MAIN:output/data/intensityAUXTD\" # path to XGM AUX data\n",
    "adc_root = \"/INSTRUMENT/FXE_RR_DAQ/ADC/1:network/digitizers\" # root path to ADC data\n",
    "adc_paths = [\"channel_1_A/apd/pulseIntegral\", \"channel_1_B/apd/pulseIntegral\",\n",
    "             \"channel_1_C/apd/pulseIntegral\", \"channel_1_D/apd/pulseIntegral\"]\n",
    "temp_out_folder = \"/gpfs/exfel/data/scratch/haufs/test/tempout6\"\n",
    "cluster_profile = \"noDB\"\n",
    "chunk_size = 512 # read this amount of data at once\n",
    "runs = \"69-72\"\n",
    "method = \"average\" # method to use for evaluation of images: radial, average\n",
    "\n",
    "sequences = [-1] # range allowed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def create_run_list(proposal_folder, file_prefix, xgm_folder, xgm_prefix, runs):\n",
    "    import glob\n",
    "\n",
    "    from cal_tools.tools import parse_runs\n",
    "    in_runs = parse_runs(runs, return_type=int)\n",
    "    runs_to_process = []\n",
    "    for r in in_runs:\n",
    "        \n",
    "        try:\n",
    "            \n",
    "            tl = glob.glob(\"{}/r{:04d}/{}*.h5\".format(proposal_folder, r, file_prefix.format(r)))\n",
    "            tlxgm = glob.glob(\"{}/r{:04d}/{}*.h5\".format(xgm_folder, r, xgm_prefix.format(r)))\n",
    "            \n",
    "            if len(tl) and len(tlxgm):\n",
    "                runs_to_process.append(r)\n",
    "        except Exception as e:\n",
    "            print(e)\n",
    "            pass\n",
    "        \n",
    "    return runs_to_process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "import os\n",
    "\n",
    "import h5py\n",
    "import matplotlib\n",
    "import numpy as np\n",
    "\n",
    "matplotlib.use(\"agg\")\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline\n",
    "from functools import partial\n",
    "\n",
    "from ipyparallel import Client\n",
    "\n",
    "print(\"Connecting to profile {}\".format(cluster_profile))\n",
    "view = Client(profile=cluster_profile)[:]\n",
    "view.use_dill()\n",
    "\n",
    "d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018\n",
    "#runs_to_process = create_run_list(proposal_folder, file_prefix, xgm_folder, xgm_prefix, runs)\n",
    "runs_to_process = runs\n",
    "\n",
    "if method not in [\"radial\", \"average\"]:\n",
    "    print(\"Unknown method, defaulting to average\")\n",
    "    method = \"average\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_sequences(in_folder, run, sequences):\n",
    "    import glob\n",
    "    import re\n",
    "\n",
    "    import numpy as np\n",
    "    \n",
    "    sequence_files = glob.glob(\"{}/r{:04d}/*-S*.h5\".format(in_folder, run))\n",
    "    seq_nums = set()\n",
    "    for sf in sequence_files:\n",
    "        seqnum = re.findall(r\".*-S([0-9]*).h5\", sf)[0]\n",
    "        seq_nums.add(int(seqnum))\n",
    "    if len(sequences) and sequences[0] != -1:\n",
    "        seq_nums &= set(sequences)\n",
    "   \n",
    "    return list(seq_nums)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def handle_sequence(run, proposal_folder, xgm_folder,\n",
    "                    file_prefix, xgm_prefix, data_path, xgm_path, xgm_path_td, raw_prefix, \n",
    "                    geometry_file, d_quads, temp_out_folder, gain_path, \n",
    "                    adc_folder, adc_prefix, adc_root, adc_paths, method, sequence):\n",
    "    import cal_tools.metrology as metro\n",
    "    import h5py\n",
    "    import numpy as np\n",
    "    from scipy.stats import binned_statistic_2d\n",
    "\n",
    "    # convert the Carthesian coordinates of the detector to polar coordinates\n",
    "    def mod_cart_to_pol(d, dx, dy, filter_by_val=True):\n",
    "        \"\"\" Convert carthesian coords to polar coords\n",
    "        \"\"\"\n",
    "        cx, cy = d.shape\n",
    "        x = np.arange(cx)+dx\n",
    "        y = np.arange(cy)+dy\n",
    "        x = np.repeat(x[:,None], cy, axis=1)\n",
    "        y = np.repeat(y[None,:], cx, axis=0)\n",
    "\n",
    "        rho = np.sqrt(x**2 + y**2).flatten()\n",
    "        phi = np.arctan2(y, x).flatten()\n",
    "        flat = d.flatten()\n",
    "\n",
    "        # we also perform a bit of filtering here\n",
    "\n",
    "        if filter_by_val:\n",
    "            good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)\n",
    "\n",
    "            return rho[good], phi[good], flat[good], good\n",
    "\n",
    "        return rho, phi, flat, None\n",
    "    \n",
    "    \n",
    "    \n",
    "    xgm_file = \"{}/r{:04d}/{}-S{:05d}.h5\".format(xgm_folder, run, xgm_prefix.format(run), sequence)\n",
    "    adc_file = \"{}/r{:04d}/{}-S{:05d}.h5\".format(adc_folder, run, adc_prefix.format(run), sequence) if adc_folder != \"none\" else None\n",
    "    data_files = \"{}/r{:04d}/{}*-S{:05d}.h5\".format(proposal_folder, run, file_prefix.format(run), sequence)\n",
    "    raw_files = \"{}/r{:04d}/{}*-S{:05d}.h5\".format(xgm_folder, run, raw_prefix.format(run), sequence)\n",
    "    \n",
    "    try:\n",
    "        with h5py.File(xgm_file, \"r\") as f:\n",
    "\n",
    "            xgm_tid = np.array(f[\"/INDEX/trainId\"])\n",
    "\n",
    "            adc_datas = []\n",
    "            if adc_file:\n",
    "                with h5py.File(adc_file, \"r\") as f2:\n",
    "                    adc_tid = np.array(f2[\"/INDEX/trainId\"])\n",
    "                    intersected = np.intersect1d(xgm_tid, adc_tid)\n",
    "                    adc_idx = np.in1d(adc_tid, intersected)\n",
    "                    xgm_idx = np.in1d(xgm_tid, intersected)\n",
    "                    for p in adc_paths:\n",
    "                        print(\"ADC path: {}\".format(\"{}/{}\".format(adc_root, p)))\n",
    "                        adc_datas.append(f2[\"{}/{}\".format(adc_root, p)][adc_idx, ...])\n",
    "\n",
    "                    xgm_tid = xgm_tid[xgm_idx]\n",
    "                    xgm_d = np.array(f[xgm_path])[xgm_idx]\n",
    "                    xgm_d_td = np.array(f[xgm_path_td])[xgm_idx]\n",
    "            else:\n",
    "\n",
    "                xgm_d = np.array(f[xgm_path])\n",
    "                xgm_d_td = np.array(f[xgm_path_td])\n",
    "\n",
    "            if method == \"radial\":\n",
    "                posarr, intersected, pcounts = metro.positionFileList(data_files, data_path, geometry_file, d_quads, trainIds=xgm_tid)\n",
    "                garr, _, _ = metro.positionFileList(data_files, gain_path, geometry_file, d_quads, trainIds=xgm_tid)\n",
    "                rawarr, _, _ = metro.positionFileList(raw_files, data_path, geometry_file, d_quads, trainIds=xgm_tid, nwa=True, max_counts=16)\n",
    "            elif method == \"average\":\n",
    "                posarr, intersected, pcounts = metro.matchedFileList(data_files, data_path, trainIds=xgm_tid)\n",
    "                garr, _, _ = metro.matchedFileList(data_files, gain_path, trainIds=xgm_tid)\n",
    "                rawarr, _, _ = metro.matchedFileList(raw_files, data_path, trainIds=xgm_tid, nwa=True, max_counts=63)\n",
    "                rawarr = np.moveaxis(rawarr, 3, 4)\n",
    "            print(posarr.shape)\n",
    "            xgm_d = xgm_d[np.in1d(xgm_tid, intersected), ...]\n",
    "            xgm_d_td = xgm_d_td[np.in1d(xgm_tid, intersected), ...]\n",
    "            xgm_f = []\n",
    "            xgm_f_td = []\n",
    "            adc_datas_f = []\n",
    "            for i in range(len(adc_datas)):\n",
    "                adc_datas_f.append([])\n",
    "            for i in range(pcounts.size):\n",
    "                xgm_f += xgm_d[i, :pcounts[i]].flatten().tolist()\n",
    "                xgm_f_td += xgm_d_td[i, :pcounts[i]].flatten().tolist()\n",
    "                for k in range(len(adc_datas)):\n",
    "                    adc_datas_f[k] += adc_datas[k][i, :pcounts[i]].flatten().tolist()\n",
    "            xgm_d = np.array(xgm_f)\n",
    "            xgm_f_td = np.array(xgm_f_td)\n",
    "            for i in range(len(adc_datas)):\n",
    "                adc_datas[i] = np.array(adc_datas_f[i])\n",
    "            xgm_res = []\n",
    "            xgm_res_td = []\n",
    "            max_im = []\n",
    "            sum_im = []\n",
    "            max_gain = []\n",
    "            mean_gain = []\n",
    "            raw_det = []\n",
    "            adc_ret = []\n",
    "            for i in range(len(adc_datas)):\n",
    "                adc_ret.append([])\n",
    "            print(\"Processing {} images, XGM size: {}, ADC size: {}\".format(posarr.shape[0], xgm_d.size, adc_datas[0].size))\n",
    "            print(\"Data shape {}, {}\".format(posarr.shape, rawarr.shape))\n",
    "            for i in range(posarr.shape[0] if method==\"radial\" else posarr.shape[-1]):\n",
    "\n",
    "                xgm = xgm_d[i]\n",
    "                adc = []\n",
    "                for k, a in enumerate(adc_datas):\n",
    "                    adc_ret[k].append(a[i])\n",
    "\n",
    "                xgm_res.append(xgm)\n",
    "                xgm_res_td.append(xgm_f_td[i])\n",
    "                if method == \"radial\":\n",
    "                    im = posarr[i, ...]                \n",
    "                    # Here we create histograms of the data in a polar coordinate system.\n",
    "                    # We use numpys hist2d function, giving it the polar coordinates of \n",
    "                    # each pixels, and weighing that coordinate with the pixel's value.\n",
    "                    # We obtain a histogram for each module, according to its position defined\n",
    "                    # in the coord_list.\n",
    "\n",
    "                    hs = []\n",
    "                    bins_nums = []\n",
    "                    edges = []\n",
    "\n",
    "                    goods = []\n",
    "                    bins = 1000\n",
    "\n",
    "                    dx, dy = -750, -750\n",
    "\n",
    "                    rho, phi, weights, good = mod_cart_to_pol(im, dy, dx, False)\n",
    "\n",
    "                    h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n",
    "                                                                           range=((-np.pi, np.pi), (0, 1000)),\n",
    "                                                                           statistic = \"mean\")\n",
    "                    bins_nums.append(bns)\n",
    "                    hs.append(h)\n",
    "                    edges.append((phi_edges, rho_edges))\n",
    "                    goods.append(good)\n",
    "\n",
    "                    x = np.arange(bins)/bins*1000*500e-6\n",
    "                    y = np.arange(bins)/bins*2.\n",
    "                    ds = np.nansum(np.array(hs),axis=0)\n",
    "                    ds[ds == 0] = np.nan\n",
    "                    profile = np.nanmedian(ds, axis=0)\n",
    "\n",
    "                    max_im.append(np.nanmax(profile))\n",
    "                    sum_im.append(np.nansum(profile))\n",
    "                    max_idx = np.nanargmax(profile)\n",
    "\n",
    "                    hs = []\n",
    "                    bins_nums = []\n",
    "                    edges = []\n",
    "\n",
    "                    goods = []\n",
    "                    bins = 1000\n",
    "\n",
    "                    dx, dy = -750, -750\n",
    "\n",
    "                    rho, phi, weights, good = mod_cart_to_pol(garr[i,...], dy, dx, False)\n",
    "\n",
    "                    h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n",
    "                                                                           range=((-np.pi, np.pi), (0, 1000)),\n",
    "                                                                           statistic = \"mean\")\n",
    "\n",
    "\n",
    "                    bins_nums.append(bns)\n",
    "                    hs.append(h)\n",
    "                    edges.append((phi_edges, rho_edges))\n",
    "                    goods.append(good)\n",
    "\n",
    "                    x = np.arange(bins)/bins*1000*500e-6\n",
    "                    y = np.arange(bins)/bins*2.\n",
    "                    ds = np.nansum(np.array(hs),axis=0)\n",
    "                    ds[ds == 0] = np.nan\n",
    "                    profile = np.nanmean(ds, axis=0)\n",
    "\n",
    "                    max_gain.append(profile[max_idx])\n",
    "                    mean_gain.append(np.nanmean(profile))\n",
    "\n",
    "                    hs = []\n",
    "                    bins_nums = []\n",
    "                    edges = []\n",
    "\n",
    "                    goods = []\n",
    "                    bins = 1000\n",
    "\n",
    "                    dx, dy = -750, -750\n",
    "\n",
    "                    rho, phi, weights, good = mod_cart_to_pol(rawarr[i,...], dy, dx, False)\n",
    "\n",
    "                    h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n",
    "                                                                           range=((-np.pi, np.pi), (0, 1000)),\n",
    "                                                                           statistic = \"mean\")\n",
    "                    bins_nums.append(bns)\n",
    "                    hs.append(h)\n",
    "                    edges.append((phi_edges, rho_edges))\n",
    "                    goods.append(good)\n",
    "\n",
    "                    x = np.arange(bins)/bins*1000*500e-6\n",
    "                    y = np.arange(bins)/bins*2.\n",
    "                    ds = np.nansum(np.array(hs),axis=0)\n",
    "                    ds[ds == 0] = np.nan\n",
    "                    profile = np.nanmedian(ds, axis=0)\n",
    "\n",
    "                    raw_det.append(np.nanmax(profile))\n",
    "\n",
    "                elif method == \"average\":\n",
    "\n",
    "                    im = posarr[..., i]\n",
    "                    max_im.append(np.nanmedian(im))\n",
    "                    sum_im.append(np.nansum(im))\n",
    "\n",
    "                    max_gain.append(np.nanmean(garr[..., i]))\n",
    "                    mean_gain.append(np.nanmean(garr[..., i]))\n",
    "                    \n",
    "                    raw_det.append(np.nanmedian(rawarr[..., i]))\n",
    "    \n",
    "        np.savez(\"{}/{}_{}\".format(temp_out_folder, run, sequence),\n",
    "                 np.array(xgm_res), np.array(max_im), np.array(sum_im),\n",
    "                 np.array(max_gain), np.array(mean_gain), np.array(xgm_res_td),\n",
    "                 np.array(raw_det), np.array(adc_ret))\n",
    "    except:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for run in runs_to_process:\n",
    "    seqs = get_sequences(proposal_folder, run, sequences)\n",
    "    par_handle = partial(handle_sequence, run, proposal_folder, xgm_folder,\n",
    "                         file_prefix, xgm_prefix, data_path, xgm_path, xgm_path_td, raw_prefix,\n",
    "                         geometry_file, d_quads, temp_out_folder, gain_path,\n",
    "                         adc_folder, adc_prefix, adc_root, adc_paths, method)\n",
    "    #ret = view.map_sync(par_handle, seqs)\n",
    "    ret = list(map(par_handle, seqs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}