{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Mine XGM Data and LPD Profile #\n", "\n", "Author: S. Hauf, Version: 0.1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dc = [1.5, 1] # center offset\n", "geometry_file = \"/gpfs/exfel/d/cal/exchange/lpdMF_00.h5\" # the geometry file to use, MAR 2018\n", "proposal_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/proc/\" # folder under which runs can be found\n", "xgm_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/raw/\" # folder under which runs can be found\n", "adc_folder = \"/gpfs/exfel/exp/FXE/201701/p002045/raw/\" # folder under which runs can be found\n", "file_prefix = \"CORR-R{:04d}-LPD\" # prefix of each data file - should be a template to replace run number\n", "raw_prefix = \"RAW-R{:04d}-LPD\" # prefix of each data file - should be a template to replace run number\n", "xgm_prefix = \"RAW-R{:04d}-DA03\" # prefix for XGM file - should be a template to replace run number\n", "adc_prefix = \"RAW-R{:04d}-DA01\" # prefix for ADC file - should be a template to replace run number\n", "data_path = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/data\" # path to data in file\n", "gain_path = \"INSTRUMENT/FXE_DET_LPD1M-1/DET/{}CH0:xtdf/image/gain\" # path to data in file\n", "xgm_path = \"/INSTRUMENT/SA1_XTD2_XGM/DOOCS/MAIN:output/data/intensityTD\" # path to XGM data\n", "xgm_path_td = \"/INSTRUMENT/SA1_XTD2_XGM/DOOCS/MAIN:output/data/intensityAUXTD\" # path to XGM AUX data\n", "adc_root = \"/INSTRUMENT/FXE_RR_DAQ/ADC/1:network/digitizers\" # root path to ADC data\n", "adc_paths = [\"channel_1_A/apd/pulseIntegral\", \"channel_1_B/apd/pulseIntegral\",\n", " \"channel_1_C/apd/pulseIntegral\", \"channel_1_D/apd/pulseIntegral\"]\n", "temp_out_folder = \"/gpfs/exfel/data/scratch/haufs/test/tempout6\"\n", "cluster_profile = \"noDB\"\n", "chunk_size = 512 # read this amount of data at once\n", "runs = \"69-72\"\n", "method = \"average\" # method to use for evaluation of images: radial, average\n", "\n", "sequences = [-1] # range allowed" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def create_run_list(proposal_folder, file_prefix, xgm_folder, xgm_prefix, runs):\n", " import glob\n", "\n", " from cal_tools.tools import parse_runs\n", " in_runs = parse_runs(runs, return_type=int)\n", " runs_to_process = []\n", " for r in in_runs:\n", " \n", " try:\n", " \n", " tl = glob.glob(\"{}/r{:04d}/{}*.h5\".format(proposal_folder, r, file_prefix.format(r)))\n", " tlxgm = glob.glob(\"{}/r{:04d}/{}*.h5\".format(xgm_folder, r, xgm_prefix.format(r)))\n", " \n", " if len(tl) and len(tlxgm):\n", " runs_to_process.append(r)\n", " except Exception as e:\n", " print(e)\n", " pass\n", " \n", " return runs_to_process" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import warnings\n", "\n", "warnings.filterwarnings('ignore')\n", "import os\n", "\n", "import h5py\n", "import matplotlib\n", "import numpy as np\n", "\n", "matplotlib.use(\"agg\")\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", "from functools import partial\n", "\n", "from ipyparallel import Client\n", "\n", "print(\"Connecting to profile {}\".format(cluster_profile))\n", "view = Client(profile=cluster_profile)[:]\n", "view.use_dill()\n", "\n", "d_quads = [(-14+dc[0],-300+dc[1]),(10+dc[0],-9+dc[1]),(-256+dc[0],15+dc[1]),(-280+dc[0],-276+dc[1])] # MAR 2018\n", "#runs_to_process = create_run_list(proposal_folder, file_prefix, xgm_folder, xgm_prefix, runs)\n", "runs_to_process = runs\n", "\n", "if method not in [\"radial\", \"average\"]:\n", " print(\"Unknown method, defaulting to average\")\n", " method = \"average\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_sequences(in_folder, run, sequences):\n", " import glob\n", " import re\n", "\n", " import numpy as np\n", " \n", " sequence_files = glob.glob(\"{}/r{:04d}/*-S*.h5\".format(in_folder, run))\n", " seq_nums = set()\n", " for sf in sequence_files:\n", " seqnum = re.findall(r\".*-S([0-9]*).h5\", sf)[0]\n", " seq_nums.add(int(seqnum))\n", " if len(sequences) and sequences[0] != -1:\n", " seq_nums &= set(sequences)\n", " \n", " return list(seq_nums)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def handle_sequence(run, proposal_folder, xgm_folder,\n", " file_prefix, xgm_prefix, data_path, xgm_path, xgm_path_td, raw_prefix, \n", " geometry_file, d_quads, temp_out_folder, gain_path, \n", " adc_folder, adc_prefix, adc_root, adc_paths, method, sequence):\n", " import cal_tools.metrology as metro\n", " import h5py\n", " import numpy as np\n", " from scipy.stats import binned_statistic_2d\n", "\n", " # convert the Carthesian coordinates of the detector to polar coordinates\n", " def mod_cart_to_pol(d, dx, dy, filter_by_val=True):\n", " \"\"\" Convert carthesian coords to polar coords\n", " \"\"\"\n", " cx, cy = d.shape\n", " x = np.arange(cx)+dx\n", " y = np.arange(cy)+dy\n", " x = np.repeat(x[:,None], cy, axis=1)\n", " y = np.repeat(y[None,:], cx, axis=0)\n", "\n", " rho = np.sqrt(x**2 + y**2).flatten()\n", " phi = np.arctan2(y, x).flatten()\n", " flat = d.flatten()\n", "\n", " # we also perform a bit of filtering here\n", "\n", " if filter_by_val:\n", " good = np.isfinite(flat) & (flat > 0) & (flat < 1e5)\n", "\n", " return rho[good], phi[good], flat[good], good\n", "\n", " return rho, phi, flat, None\n", " \n", " \n", " \n", " xgm_file = \"{}/r{:04d}/{}-S{:05d}.h5\".format(xgm_folder, run, xgm_prefix.format(run), sequence)\n", " adc_file = \"{}/r{:04d}/{}-S{:05d}.h5\".format(adc_folder, run, adc_prefix.format(run), sequence) if adc_folder != \"none\" else None\n", " data_files = \"{}/r{:04d}/{}*-S{:05d}.h5\".format(proposal_folder, run, file_prefix.format(run), sequence)\n", " raw_files = \"{}/r{:04d}/{}*-S{:05d}.h5\".format(xgm_folder, run, raw_prefix.format(run), sequence)\n", " \n", " try:\n", " with h5py.File(xgm_file, \"r\") as f:\n", "\n", " xgm_tid = np.array(f[\"/INDEX/trainId\"])\n", "\n", " adc_datas = []\n", " if adc_file:\n", " with h5py.File(adc_file, \"r\") as f2:\n", " adc_tid = np.array(f2[\"/INDEX/trainId\"])\n", " intersected = np.intersect1d(xgm_tid, adc_tid)\n", " adc_idx = np.in1d(adc_tid, intersected)\n", " xgm_idx = np.in1d(xgm_tid, intersected)\n", " for p in adc_paths:\n", " print(\"ADC path: {}\".format(\"{}/{}\".format(adc_root, p)))\n", " adc_datas.append(f2[\"{}/{}\".format(adc_root, p)][adc_idx, ...])\n", "\n", " xgm_tid = xgm_tid[xgm_idx]\n", " xgm_d = np.array(f[xgm_path])[xgm_idx]\n", " xgm_d_td = np.array(f[xgm_path_td])[xgm_idx]\n", " else:\n", "\n", " xgm_d = np.array(f[xgm_path])\n", " xgm_d_td = np.array(f[xgm_path_td])\n", "\n", " if method == \"radial\":\n", " posarr, intersected, pcounts = metro.positionFileList(data_files, data_path, geometry_file, d_quads, trainIds=xgm_tid)\n", " garr, _, _ = metro.positionFileList(data_files, gain_path, geometry_file, d_quads, trainIds=xgm_tid)\n", " rawarr, _, _ = metro.positionFileList(raw_files, data_path, geometry_file, d_quads, trainIds=xgm_tid, nwa=True, max_counts=16)\n", " elif method == \"average\":\n", " posarr, intersected, pcounts = metro.matchedFileList(data_files, data_path, trainIds=xgm_tid)\n", " garr, _, _ = metro.matchedFileList(data_files, gain_path, trainIds=xgm_tid)\n", " rawarr, _, _ = metro.matchedFileList(raw_files, data_path, trainIds=xgm_tid, nwa=True, max_counts=63)\n", " rawarr = np.moveaxis(rawarr, 3, 4)\n", " print(posarr.shape)\n", " xgm_d = xgm_d[np.in1d(xgm_tid, intersected), ...]\n", " xgm_d_td = xgm_d_td[np.in1d(xgm_tid, intersected), ...]\n", " xgm_f = []\n", " xgm_f_td = []\n", " adc_datas_f = []\n", " for i in range(len(adc_datas)):\n", " adc_datas_f.append([])\n", " for i in range(pcounts.size):\n", " xgm_f += xgm_d[i, :pcounts[i]].flatten().tolist()\n", " xgm_f_td += xgm_d_td[i, :pcounts[i]].flatten().tolist()\n", " for k in range(len(adc_datas)):\n", " adc_datas_f[k] += adc_datas[k][i, :pcounts[i]].flatten().tolist()\n", " xgm_d = np.array(xgm_f)\n", " xgm_f_td = np.array(xgm_f_td)\n", " for i in range(len(adc_datas)):\n", " adc_datas[i] = np.array(adc_datas_f[i])\n", " xgm_res = []\n", " xgm_res_td = []\n", " max_im = []\n", " sum_im = []\n", " max_gain = []\n", " mean_gain = []\n", " raw_det = []\n", " adc_ret = []\n", " for i in range(len(adc_datas)):\n", " adc_ret.append([])\n", " print(\"Processing {} images, XGM size: {}, ADC size: {}\".format(posarr.shape[0], xgm_d.size, adc_datas[0].size))\n", " print(\"Data shape {}, {}\".format(posarr.shape, rawarr.shape))\n", " for i in range(posarr.shape[0] if method==\"radial\" else posarr.shape[-1]):\n", "\n", " xgm = xgm_d[i]\n", " adc = []\n", " for k, a in enumerate(adc_datas):\n", " adc_ret[k].append(a[i])\n", "\n", " xgm_res.append(xgm)\n", " xgm_res_td.append(xgm_f_td[i])\n", " if method == \"radial\":\n", " im = posarr[i, ...] \n", " # Here we create histograms of the data in a polar coordinate system.\n", " # We use numpys hist2d function, giving it the polar coordinates of \n", " # each pixels, and weighing that coordinate with the pixel's value.\n", " # We obtain a histogram for each module, according to its position defined\n", " # in the coord_list.\n", "\n", " hs = []\n", " bins_nums = []\n", " edges = []\n", "\n", " goods = []\n", " bins = 1000\n", "\n", " dx, dy = -750, -750\n", "\n", " rho, phi, weights, good = mod_cart_to_pol(im, dy, dx, False)\n", "\n", " h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n", " range=((-np.pi, np.pi), (0, 1000)),\n", " statistic = \"mean\")\n", " bins_nums.append(bns)\n", " hs.append(h)\n", " edges.append((phi_edges, rho_edges))\n", " goods.append(good)\n", "\n", " x = np.arange(bins)/bins*1000*500e-6\n", " y = np.arange(bins)/bins*2.\n", " ds = np.nansum(np.array(hs),axis=0)\n", " ds[ds == 0] = np.nan\n", " profile = np.nanmedian(ds, axis=0)\n", "\n", " max_im.append(np.nanmax(profile))\n", " sum_im.append(np.nansum(profile))\n", " max_idx = np.nanargmax(profile)\n", "\n", " hs = []\n", " bins_nums = []\n", " edges = []\n", "\n", " goods = []\n", " bins = 1000\n", "\n", " dx, dy = -750, -750\n", "\n", " rho, phi, weights, good = mod_cart_to_pol(garr[i,...], dy, dx, False)\n", "\n", " h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n", " range=((-np.pi, np.pi), (0, 1000)),\n", " statistic = \"mean\")\n", "\n", "\n", " bins_nums.append(bns)\n", " hs.append(h)\n", " edges.append((phi_edges, rho_edges))\n", " goods.append(good)\n", "\n", " x = np.arange(bins)/bins*1000*500e-6\n", " y = np.arange(bins)/bins*2.\n", " ds = np.nansum(np.array(hs),axis=0)\n", " ds[ds == 0] = np.nan\n", " profile = np.nanmean(ds, axis=0)\n", "\n", " max_gain.append(profile[max_idx])\n", " mean_gain.append(np.nanmean(profile))\n", "\n", " hs = []\n", " bins_nums = []\n", " edges = []\n", "\n", " goods = []\n", " bins = 1000\n", "\n", " dx, dy = -750, -750\n", "\n", " rho, phi, weights, good = mod_cart_to_pol(rawarr[i,...], dy, dx, False)\n", "\n", " h, phi_edges, rho_edges, bns = binned_statistic_2d(phi, rho, weights, bins=(bins,bins),\n", " range=((-np.pi, np.pi), (0, 1000)),\n", " statistic = \"mean\")\n", " bins_nums.append(bns)\n", " hs.append(h)\n", " edges.append((phi_edges, rho_edges))\n", " goods.append(good)\n", "\n", " x = np.arange(bins)/bins*1000*500e-6\n", " y = np.arange(bins)/bins*2.\n", " ds = np.nansum(np.array(hs),axis=0)\n", " ds[ds == 0] = np.nan\n", " profile = np.nanmedian(ds, axis=0)\n", "\n", " raw_det.append(np.nanmax(profile))\n", "\n", " elif method == \"average\":\n", "\n", " im = posarr[..., i]\n", " max_im.append(np.nanmedian(im))\n", " sum_im.append(np.nansum(im))\n", "\n", " max_gain.append(np.nanmean(garr[..., i]))\n", " mean_gain.append(np.nanmean(garr[..., i]))\n", " \n", " raw_det.append(np.nanmedian(rawarr[..., i]))\n", " \n", " np.savez(\"{}/{}_{}\".format(temp_out_folder, run, sequence),\n", " np.array(xgm_res), np.array(max_im), np.array(sum_im),\n", " np.array(max_gain), np.array(mean_gain), np.array(xgm_res_td),\n", " np.array(raw_det), np.array(adc_ret))\n", " except:\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for run in runs_to_process:\n", " seqs = get_sequences(proposal_folder, run, sequences)\n", " par_handle = partial(handle_sequence, run, proposal_folder, xgm_folder,\n", " file_prefix, xgm_prefix, data_path, xgm_path, xgm_path_td, raw_prefix,\n", " geometry_file, d_quads, temp_out_folder, gain_path,\n", " adc_folder, adc_prefix, adc_root, adc_paths, method)\n", " #ret = view.map_sync(par_handle, seqs)\n", " ret = list(map(par_handle, seqs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 1 }