configuration.rst

# path into which temporary files from each run are placed
temp_path = "{}/temp/".format(os.getcwd())

# Path to use for calling Python. If the environment is correctly set, simply the command
python_path = "python"

# Path to store reports in
report_path = "{}/calibration_reports/".format(os.getcwd())

# Also try to output the report to an out_folder defined by the notebook
try_report_to_output = True

# the command to run this concurrently. It is prepended to the actual call
launcher_command = "sbatch -p exfel -t 24:00:00 --mem 500G --mail-type END --requeue --output {temp_path}/slurm-%j.out"
notebooks = {
    "AGIPD": {
        "DARK": {
            "notebook": "AGIPD/Characterize_AGIPD_Gain_Darks_NBC.ipynb",
            "concurrency": {"parameter": "modules",
                            "default concurrency": 16,
                            "cluster cores": 16},
         },
         "PC":   {
             "notebook": "AGIPD/Chracterize_AGIPD_Gain_PC_NBC.ipynb",
             "concurrency": "parameter": "modules",
                            "default concurrency": 16,
                            "cluster cores": 16},
         },
         "CORRECT":   {
             "notebook": "notebooks/AGIPD/AGIPD_Correct_and_Verify.ipynb",
             "concurrency": {"parameter": "sequences",
             "use function": "balance_sequences",
             "default concurrency": [-1],
             "cluster cores": 32},
         ...
     }
 }
def balance_sequences(in_folder, run, sequences, sequences_per_node):
    import glob
    import re
    import numpy as np
    if sequences_per_node != 0:
        sequence_files = glob.glob("{}/r{:04d}/*-S*.h5".format(in_folder, run))
        seq_nums = set()
        for sf in sequence_files:
            seqnum = re.findall(r".*-S([0-9]*).h5", sf)[0]
            seq_nums.add(int(seqnum))
        seq_nums -= set(sequences)
        return [l.tolist() for l in np.array_split(list(seq_nums),
                                                   len(seq_nums)//sequences_per_node+1)]
    else:
        return sequences
in_folder = "/gpfs/exfel/exp/SPB/201701/p002038/raw/" # the folder to read data from, required
run = 239 # runs to process, required
sequences = [-1] # sequences to correct, set to -1 for all, range allowed
sequences_per_node = 2 # number of sequence files per cluster node if run as slurm job, set to 0 to not run SLURM parallel