diff --git a/src/xfel_calibrate/calibrate.py b/src/xfel_calibrate/calibrate.py index 9c130b403fcb5f5f64fd550631f091681a51e361..b44252144dbc8d59654534c226d153a226668959 100755 --- a/src/xfel_calibrate/calibrate.py +++ b/src/xfel_calibrate/calibrate.py @@ -83,9 +83,18 @@ def make_initial_parser(**kwargs): help='Filename (and optionally path) for output' ' report') + parser.add_argument('--not-reproducible', action='store_true', + help='Disable checks to allow the processing result ' + 'to not be reproducible based on its metadata.') + parser.add_argument('--skip-report', action='store_true', help='Skip report generation in finalize step.') + parser.add_argument('--skip-env-freeze', action='store_true', + help='Skip recording the Python environment for ' + 'reproducibility purposes, requires ' + '--not-reproducible to run.') + parser.add_argument('--concurrency-par', type=str, help='Name of concurrency parameter.' 'If not given, it is taken from configuration.') @@ -981,6 +990,35 @@ def run(): caltype = args["type"].upper() sequential = args["no_cluster_job"] + # Pick out any arguments that may prevent reproducibility from + # working, sorted alphabetically and converted back to their + # canonical representation. + not_reproducible_args = sorted( + ('--' + x.replace('_', '-') + for x in ['skip_env_freeze'] + if args[x])) + + # If any of these arguments are set, present a warning. + if not_reproducible_args: + print('WARNING: One or more command line arguments ({}) may prevent ' + 'this specific correction result from being reproducible based ' + 'on its metadata. It may not be possible to restore identical ' + 'output data files when they have been deleted or lost. Please ' + 'ensure that the data retention policy of the chosen storage ' + 'location is sufficient for your ' + 'needs.'.format(', '.join(not_reproducible_args))) + + if not args['not_reproducible']: + # If not explicitly specified that reproducibility may be + # broken, remind the user and exit. + print('To proceed, you can explicitly allow reproducibility to ' + 'be broken by adding --not-reproducible') + sys.exit(1) + + reproducible = False + else: + reproducible = True + try: nb_info = notebooks[detector][caltype] except KeyError: @@ -1096,6 +1134,7 @@ def run(): metadata["pycalibration-version"] = version metadata["report-path"] = f"{report_to}.pdf" if report_to \ else '# REPORT SKIPPED #' + metadata['reproducible'] = reproducible metadata["concurrency"] = { 'parameter': concurrency_par, 'default': concurrency_defval, @@ -1116,8 +1155,9 @@ def run(): metadata.save() # Record installed Python packages for reproducing the environment - with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f: - check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f) + if not args['skip_env_freeze']: + with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f: + check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f) folder = get_par_attr(parms, 'in_folder', 'value', '')