Skip to content
Snippets Groups Projects

Add warning if xfel-calibrate may not be reproducible

Merged Philipp Schmidt requested to merge feat/non-reproducible-warning into master
2 unresolved threads
@@ -83,9 +83,18 @@ def make_initial_parser(**kwargs):
@@ -83,9 +83,18 @@ def make_initial_parser(**kwargs):
help='Filename (and optionally path) for output'
help='Filename (and optionally path) for output'
' report')
' report')
 
parser.add_argument('--not-reproducible', action='store_true',
 
help='Disable checks to allow the processing result '
 
'to not be reproducible based on its metadata.')
 
parser.add_argument('--skip-report', action='store_true',
parser.add_argument('--skip-report', action='store_true',
help='Skip report generation in finalize step.')
help='Skip report generation in finalize step.')
 
parser.add_argument('--skip-env-freeze', action='store_true',
 
help='Skip recording the Python environment for '
 
'reproducibility purposes, requires '
 
'--not-reproducible to run.')
 
parser.add_argument('--concurrency-par', type=str,
parser.add_argument('--concurrency-par', type=str,
help='Name of concurrency parameter.'
help='Name of concurrency parameter.'
'If not given, it is taken from configuration.')
'If not given, it is taken from configuration.')
@@ -981,6 +990,35 @@ def run():
@@ -981,6 +990,35 @@ def run():
caltype = args["type"].upper()
caltype = args["type"].upper()
sequential = args["no_cluster_job"]
sequential = args["no_cluster_job"]
 
# Pick out any arguments that may prevent reproducibility from
 
# working, sorted alphabetically and converted back to their
 
# canonical representation.
 
not_reproducible_args = sorted(
 
('--' + x.replace('_', '-')
 
for x in ['skip_env_freeze']
 
if args[x]))
 
 
# If any of these arguments are set, present a warning.
 
if not_reproducible_args:
 
print('WARNING: One or more command line arguments ({}) may prevent '
 
'this specific correction result from being reproducible based '
 
'on its metadata. It may not be possible to restore identical '
 
'output data files when they have been deleted or lost. Please '
 
'ensure that the data retention policy of the chosen storage '
 
'location is sufficient for your '
 
'needs.'.format(', '.join(not_reproducible_args)))
 
 
if not args['not_reproducible']:
 
# If not explicitly specified that reproducibility may be
 
# broken, remind the user and exit.
 
print('To proceed, you can explicitly allow reproducibility to '
 
'be broken by adding --not-reproducible')
 
sys.exit(1)
 
 
reproducible = False
 
else:
 
reproducible = True
 
try:
try:
nb_info = notebooks[detector][caltype]
nb_info = notebooks[detector][caltype]
except KeyError:
except KeyError:
@@ -1096,6 +1134,7 @@ def run():
@@ -1096,6 +1134,7 @@ def run():
metadata["pycalibration-version"] = version
metadata["pycalibration-version"] = version
metadata["report-path"] = f"{report_to}.pdf" if report_to \
metadata["report-path"] = f"{report_to}.pdf" if report_to \
else '# REPORT SKIPPED #'
else '# REPORT SKIPPED #'
 
metadata['reproducible'] = reproducible
metadata["concurrency"] = {
metadata["concurrency"] = {
'parameter': concurrency_par,
'parameter': concurrency_par,
'default': concurrency_defval,
'default': concurrency_defval,
@@ -1116,8 +1155,9 @@ def run():
@@ -1116,8 +1155,9 @@ def run():
metadata.save()
metadata.save()
# Record installed Python packages for reproducing the environment
# Record installed Python packages for reproducing the environment
with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f:
if not args['skip_env_freeze']:
check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f)
with open(os.path.join(run_tmp_path, 'requirements.txt'), 'wb') as f:
 
check_call([python_exe, '-m', 'pip', 'freeze'], stdout=f)
folder = get_par_attr(parms, 'in_folder', 'value', '')
folder = get_par_attr(parms, 'in_folder', 'value', '')
Loading