diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 01a2d7ac0154824d8f1ac0cf6ef7650d5e313020..5fe0821291a7b1305fa22bbd29f160a52fed9024 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,7 +68,7 @@ automated_test: - python3 -m pip install ".[automated_test]" - echo "Running automated test. This can take sometime to finish depending on the test data." - echo "Given variables are REFERENCE=$REFERENCE, OUTPUT=$OUTPUT, DETECTORS=$DETECTORS, CALIBRATION=$CALIBRATION" - - python3 -m pytest ./tests/test_reference_runs --color yes --verbose --release-test --reference-folder /gpfs/exfel/data/scratch/xcaltst/test/$REFERENCE --out-folder /gpfs/exfel/data/scratch/xcaltst/test/$OUTPUT --detectors $DETECTORS --calibration $CALIBRATION --find-difference + - python3 -m pytest ./tests/test_reference_runs --color yes --verbose --release-test --reference-folder /gpfs/exfel/data/scratch/xcaltst/test/$REFERENCE --out-folder /gpfs/exfel/data/scratch/xcaltst/test/$OUTPUT --detectors $DETECTORS --calibration $CALIBRATION timeout: 24 hours cython-editable-install-test: diff --git a/tests/test_reference_runs/conftest.py b/tests/test_reference_runs/conftest.py index ce3b9745888821e5e75fe4c6ef02e76a35047c56..c2c50991ea1806d3454a79a8169bc1f324cef2ec 100644 --- a/tests/test_reference_runs/conftest.py +++ b/tests/test_reference_runs/conftest.py @@ -38,15 +38,6 @@ def pytest_addoption(parser): "apply validation test on numerical data only."), ) - parser.addoption( - "--find-difference", - action="store_true", - default=False, - help=( - "In case of non numerical validation of h5file. " - "Find the different attribute and fail on the first one."), - ) - parser.addoption( "--use-slurm", action="store_true", @@ -80,8 +71,6 @@ def release_test_config(request): "--no-numerical-validation") validate_only = request.config.getoption( "--validation-only") - find_difference = request.config.getoption( - "--find-difference") use_slurm = request.config.getoption( "--use-slurm") picked_test = request.config.getoption("--picked-test") @@ -90,8 +79,7 @@ def release_test_config(request): return ( detectors, calibration, picked_test, skip_numerical_validation, validate_only, - find_difference, use_slurm, reference_folder, - out_folder, + use_slurm, reference_folder, out_folder, ) diff --git a/tests/test_reference_runs/test_pre_deployment.py b/tests/test_reference_runs/test_pre_deployment.py index b1f99ce02fadcb4a5d3d44291d7abf01b93caa1e..30ec362b291f0684dbaf5c1507f2c62253011152 100644 --- a/tests/test_reference_runs/test_pre_deployment.py +++ b/tests/test_reference_runs/test_pre_deployment.py @@ -23,236 +23,6 @@ from .callab_tests import automated_test_config LOGGER = logging.getLogger(__name__) -def file_md5( - tested_file: str, - block_size: int = 2 ** 20, -) -> bytes: - """Generating MD5 checksum for a file. - - Args: - tested_file: File to be tested. - block_size (_type_, optional): Block size for reading the file. - Defaults to 2**20. - """ - f = open(tested_file, "rb") - md5 = hashlib.md5() - while True: - data = f.read(block_size) - if not data: - break - md5.update(data) - f.close() - return md5.digest() - - -def collect_attrs(groups, datasets, objects, exclude_attrs, name, node): - """Collect h5 attrs in groups, datasets, and objects lists.""" - if node.name not in exclude_attrs: - if isinstance(node, h5py.Group): - groups.append(name) - elif isinstance(node, h5py.Dataset): - if node.dtype == 'object': - objects.append(name) - else: - datasets.append(name) - - -def compare_datasets( - file1, - file2, - datasets: list -): - """Compare the values of datasets in two h5 files.""" - h5_diff = [] - for d in datasets: - try: - if not np.allclose(file1[d][()], file2[d][()], equal_nan=True): - h5_diff.append(d) - except ValueError as e: - LOGGER.error(f"ValueError: {e}, {d}") - h5_diff.append(d) - except AttributeError as e: - LOGGER.error(f"AttributeError: {e}, {d}") - h5_diff.append(d) - return h5_diff - - -def compare_objects( - file1, - file2, - objects: list -): - """Compare the objects in two h5 files.""" - h5_diff = [] - for d in objects: - try: - if isinstance(file1[d][()], bytes): - if ( - file1[d][()].decode('utf-8') != file2[d][()].decode('utf-8') # noqa - ): - h5_diff.append(d) - elif ( - file1[d][()].dtype != file1[d][()].dtype and - not file1[d][()] != file2[d][()] - ): # pnccd files has only list of bytes - h5_diff.append(d) - except ValueError as e: - LOGGER.error(f"ValueError: {e}, {d}") - h5_diff.append(d) - except AttributeError as e: - LOGGER.error(f"AttributeError: {e}, {d}, " - f"{file1[d][()].decode('utf-8')}") - h5_diff.append(d) - return h5_diff - - -def find_differences( - test_file, - reference_file, - exclude_attrs, -): - """ - Find difference in groups, datasets, and objects between two h5files. - Args: - file1: first h5 file. - file2: second h5 file. - """ - - groups_f1 = [] - datasets_f1 = [] - objects_f1 = [] - - groups_f2 = [] - datasets_f2 = [] - objects_f2 = [] - - with h5py.File(test_file, 'r') as file1, h5py.File(reference_file, 'r') as file2: # noqa - - # Fill groups, datasets, and objects list - # to compare both h5files' attrs. - file1.visititems( - partial( - collect_attrs, - groups_f1, - datasets_f1, - objects_f1, - exclude_attrs, - )) - file2.visititems( - partial( - collect_attrs, - groups_f2, - datasets_f2, - objects_f2, - exclude_attrs, - )) - - start_time = time.perf_counter() - # Compare groups, datasets, and objects to have the same content. - assert set(groups_f1) == set(groups_f2), f"{test_file} and {reference_file} consists of different groups." # noqa - assert set(datasets_f1) == set(datasets_f2), f"{test_file} and {reference_file} consists of different datasets." # noqa - assert set(objects_f1) == set(objects_f2), f"{test_file} and {reference_file} consists of different datasets." # noqa - duration = time.perf_counter() - start_time - LOGGER.debug("Elapsed time comparing groups, " - f"datasets, and objects: {duration} seconds") - LOGGER.debug("Groups, datasets, and objects have the same content.") - - # Compare datasets and objects. - start_time = time.perf_counter() - h5_diff_datasets = compare_datasets(file1, file2, datasets_f1) - duration = time.perf_counter() - start_time - LOGGER.debug(f"Elapsed time comparing datasets: {duration} seconds") - start_time = time.perf_counter() - h5_diff_objects = compare_objects(file1, file2, objects_f1) - LOGGER.debug(f"Elapsed time comparing objects: {duration} seconds") - - assert not h5_diff_datasets, f"{[d for d in h5_diff_datasets]} datasets contain different values for {test_file} and {reference_file}" # noqa - LOGGER.debug("Datasets are validated.") - assert not h5_diff_objects, f"{[d for d in h5_diff_objects]} objects contain different values for {test_file} and {reference_file}" # noqa - LOGGER.debug("Objects are validated.") - - -def validate_files( - ref_folder: pathlib.PosixPath, - out_folder: pathlib.PosixPath, - exclude_attrs: list, - test_file: pathlib.PosixPath, -) -> Tuple[bool, pathlib.PosixPath]: - """Validate file similarities. Create temporary files to exclude - h5 attributes known to be different. e.g `report` for constants. - If both files are not identical, the function is able to loop over - both files and find and fail on the difference. - - Args: - ref_folder: The reference folder for validating the files - out_folder: The output folder for the test constant files. - test_file: The output file to be validated. - exclude_attrs: A list of datasets, groups to exclude - from validated files. - Returns: - result: validation result for metadata. - test_file: The validated file. - """ - import h5py - start_validating = time.perf_counter() - - def exclude_sources(source_file, dest, excluded_sources): - # Open the source file in read-only mode - with h5py.File(source_file, 'r') as source: - - # Recursively visit all objects in the source file - def visit_func(name, obj): - # Check if the object should be excluded - if name in excluded_sources: - return - - # Check if the object is a dataset - if isinstance(obj, h5py.Dataset): - # Create a new dataset in the destination - # file and copy the data - dest.create_dataset(name, data=obj[()]) - - # Visit all objects in the source file and - # copy them to the destination file - source.visititems(visit_func) - - with tempfile.NamedTemporaryFile( - dir=out_folder, - suffix=".tmp", - prefix="cal_", - delete=True, - ) as out_tf, tempfile.NamedTemporaryFile( - dir=out_folder, - suffix=".tmp", - prefix="cal_", - delete=True, - ) as ref_tf: - - # Create in-memory HDF5 files for validation - with h5py.File(out_tf.name, 'a') as hp1, h5py.File(ref_tf.name, 'a') as hp2: # noqa - - start_time = time.perf_counter() - # Copy h5 files for validation and exclude selected attrs. - exclude_sources(test_file, hp1, exclude_attrs) - - duration = time.perf_counter() - start_time - LOGGER.debug(f"Elapsed time copying {test_file}: " - f"{duration} seconds") - - start_time = time.perf_counter() - exclude_sources(ref_folder / test_file.name, hp2, exclude_attrs) - - duration = time.perf_counter() - start_time - LOGGER.debug(f"Elapsed time copying {ref_folder / test_file.name}: " - f"{duration} seconds") - - start_time = time.perf_counter() - result = file_md5(out_tf.name) == file_md5(ref_tf.name) - LOGGER.debug(f"MD5 validation for {test_file}: {duration} seconds") - duration = time.perf_counter() - start_validating - return result, test_file - - @dataclass class ComparisonResult: filename: str @@ -475,7 +245,7 @@ def test_xfel_calibrate( ( detectors, calibration, picked_test, - skip_numerical_validation, only_validate, find_difference, + skip_numerical_validation, only_validate, use_slurm, reference_dir_base, out_dir_base, ) = release_test_config @@ -520,7 +290,6 @@ def test_xfel_calibrate( out_folder, reference_folder, cal_type, - find_difference, ) return @@ -558,5 +327,4 @@ def test_xfel_calibrate( out_folder, reference_folder, cal_type, - find_difference, )