diff --git a/cal_tools/cal_tools/tools.py b/cal_tools/cal_tools/tools.py index d5477e9f6037d2005d66aa5b9a5cba02628bd67a..73d1d9148831e7bb5657b223391ddf561c893d32 100644 --- a/cal_tools/cal_tools/tools.py +++ b/cal_tools/cal_tools/tools.py @@ -50,6 +50,84 @@ def run_prop_seq_from_path(filename): return run, proposal, sequence +def map_modules_from_folder(in_folder, run, path_template, karabo_da, + sequences=None): + """ + Prepare queues of files to process. + Queues are stored in dictionary with module name Q{}M{} as a key + + :param in_folder: Input folder with raw data + :param run: Run number + :param path_template: Template for file name e.g. `RAW-R{:04d}-{}-S{:05d}.h5` + :param karabo_da: List of data aggregators e.g. [AGIPD00, AGIPD01] + :param sequences: List of sequences to be considered + :return: Dictionary of queues of files, dictionary of module indexes, + total number of sequences, dictionary of number of sequences per module + """ + module_files = OrderedDict() + mod_ids = OrderedDict() + total_sequences = 0 + total_file_size = 0 + sequences_qm = {} + for inset in karabo_da: + module_idx = int(inset[-2:]) + name = f"Q{module_idx // 4 + 1}M{module_idx % 4 + 1}" + module_files[name] = Queue() + sequences_qm[name] = 0 + mod_ids[name] = module_idx + if sequences is None: + fname = path_template.format(run, inset, 0).replace("S00000", "S*") + abs_fname = "{}/r{:04d}/{}".format(in_folder, run, fname) + + for filename in glob(abs_fname): + module_files[name].put(filename) + total_sequences += 1 + sequences_qm[name] += 1 + total_file_size += path.getsize(filename) + else: + for sequence in sequences: + fname = path_template.format(run, inset, sequence) + abs_fname = "{}/r{:04d}/{}".format(in_folder, run, fname) + if not isfile(abs_fname): + continue + + module_files[name].put(abs_fname) + total_sequences += 1 + sequences_qm[name] += 1 + total_file_size += path.getsize(filename) + + return (module_files, mod_ids, total_sequences, + sequences_qm, total_file_size) + + +def map_gain_stages(in_folder, runs, path_template, karabo_da, sequences=None): + """ + Prepare queues of files to process. + Queues are stored in dictionary with module name Q{}M{} + and gain name as a keys + :param in_folder: Input folder with raw data + :param runs: Dictionary of runs with key naming the gain stages + :param path_template: Template for file name e.g. `RAW-R{:04d}-{}-S{:05d}.h5` + :param karabo_da: List of data aggregators e.g. [AGIPD00, AGIPD01] + :param sequences: List of sequences to be considered + :return: Dictionary of queues of files, + total number of sequences + """ + total_sequences = 0 + total_file_size = 0 + gain_mapped_files = OrderedDict() + for gain, run in runs.items(): + mapped_files, _, seq, _, fs = map_modules_from_folder(in_folder, run, + path_template, + karabo_da, + sequences) + + total_sequences += seq + total_file_size += fs + gain_mapped_files[gain] = mapped_files + return gain_mapped_files, total_sequences, total_file_size / 1e9 + + def map_modules_from_files(filelist, file_inset, quadrants, modules_per_quad): total_sequences = 0 total_file_size = 0