diff --git a/setup.py b/setup.py
index 58d5774d9f3223f9c5ef8a8a2890fb420f3b3e2a..f0a8b0418959fd06b177fae7e29abbb852a51ca0 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,8 @@ setup(
     entry_points={
         'console_scripts': [
             'exdf-glance = exdf.cli.glance:main',
-            'exdf-reduce = exdf.cli.reduce:main'
+            'exdf-reduce = exdf.cli.reduce:main',
+            'exdf-compare = exdf.cli.compare:main'
         ],
 
         'exdf.data_reduction.method': [
diff --git a/src/exdf/cli/compare.py b/src/exdf/cli/compare.py
new file mode 100644
index 0000000000000000000000000000000000000000..73d2e82c5b7499c560ff7f7e4ed696577c7f5e0a
--- /dev/null
+++ b/src/exdf/cli/compare.py
@@ -0,0 +1,228 @@
+
+from argparse import ArgumentParser
+from pathlib import Path
+
+import numpy as np
+
+from extra_data import RunDirectory, by_id, by_index
+
+
+show_true_cond = True
+show_false_cond = True
+num_true_cond = 0
+num_false_cond = 0
+
+
+def cmp(label, cond):
+    if cond:
+        global num_true_cond
+        num_true_cond += 1
+
+        if show_true_cond:
+            print(f'✅ {label}')
+    else:
+        global num_false_cond
+        num_false_cond += 1
+
+        if show_false_cond:
+            print(f'❌ {label}')
+
+    return cond
+
+
+def main(argv=None):
+    ap = ArgumentParser(
+        description='Compare data collections structured in the '
+                    'European XFEL Data Format (EXDF).')
+
+    ap.add_argument(
+        'input1', metavar='INPUT1', type=Path,
+        help='folder of input data to compare with INPUT2')
+
+    ap.add_argument(
+        'input2', metavar='INPUT2', type=Path,
+        help='folder of input data to compare with INPUT1')
+
+    output_group = ap.add_mutually_exclusive_group()
+
+    output_group.add_argument(
+        '--verbose', '-v', action='store_true',
+        help='whether to show all compared items and not only the different '
+             'ones')
+
+    output_group.add_argument(
+        '--quiet', '-q', action='store_true',
+        help='whether to only print the count of unequal items')
+
+    select_group = ap.add_argument_group(
+        'Selection arguments',
+        'Allows to select only part of the data collections before comparing.')
+
+    src_select_group = select_group.add_mutually_exclusive_group()
+
+    src_select_group.add_argument(
+        '--select',
+        metavar='SRC,KEY', action='store', type=str, nargs='*',
+        help='only compare the data collection after selecting specified '
+             'sources and/or keys')
+
+    src_select_group.add_argument(
+        '--deselect',
+        metavar='SRC,KEY', action='store', type=str, nargs='*',
+        help='only compare the data collection after deselecting specified '
+             'sources and/or keys')
+
+    train_select_group = select_group.add_mutually_exclusive_group()
+
+    train_select_group.add_argument(
+        '--trains-by-id',
+        metavar='SLICE_EXPR', action='store', type=str,
+        help='only compare the data collection after selecting specified '
+             'trains by ID')
+
+    train_select_group.add_argument(
+        '--trains-by-index',
+        metavar='SLICE_EXPR', action='store', type=str,
+        help='only compare the data collection after selecting specified '
+             'trains by index')
+
+    scope_group = ap.add_argument_group(
+        'Scope of comparison arguments',
+        'Allows to restrict the scope to which the data collections are '
+        'compared with by default includes everything including the data '
+        'itself.'
+    ).add_mutually_exclusive_group()
+
+    scope_group.add_argument(
+        '--only-metadata', '-m', action='store_true',
+        help='check only metadata independent of individual sources')
+
+    scope_group.add_argument(
+        '--only-index', '-i', action='store_true',
+        help='check only metadata and sources\' index entries')
+
+    scope_group.add_argument(
+        '--only-control', '-c', action='store_true',
+        help='check metadata and index entries of all sources but actual '
+             'data only for control sources')
+
+    args = ap.parse_args(argv)
+
+    global show_true_cond, show_false_cond
+    if args.verbose:
+        show_true_cond = True
+        show_false_cond = True
+    elif args.quiet:
+        show_true_cond = False
+        show_false_cond = False
+    else:
+        show_true_cond = False
+        show_false_cond = True
+
+    data1 = RunDirectory(args.input1)
+    data2 = RunDirectory(args.input2)
+
+    if args.select:
+        select_strs = args.select
+        select_method = 'select'
+    elif args.deselect:
+        select_strs = args.deselect
+        select_method = 'deselect'
+    else:
+        select_strs = []
+
+    sel = [select_str.split(',') if ',' in select_str else (select_str, '*')
+           for select_str in select_strs]
+
+    if sel:
+        data1 = getattr(data1, select_method)(sel)
+        data2 = getattr(data2, select_method)(sel)
+
+    if args.trains_by_id:
+        sel = eval(f'by_id[{args.trains_by_id}]')
+    elif args.trains_by_index:
+        sel = eval(f'by_index[{args.trains_by_index}]')
+    else:
+        sel = None
+
+    if sel is not None:
+        data1 = data1.select_trains(sel)
+        data2 = data2.select_trains(sel)
+
+    meta1 = data1.run_metadata()
+    meta2 = data2.run_metadata()
+
+    for meta in [meta1, meta2]:
+        meta.pop('creationDate', None)
+        meta.pop('updateDate', None)
+        meta.pop('dataFormatVersion', None)
+        meta.pop('karaboFramework', None)
+        meta.pop('daqLibrary', None)
+        meta.pop('dataWriter', None)
+
+    cmp('Metadata excluding dates and versions', meta1 == meta2)
+    cmp('Train IDs', data1.train_ids == data2.train_ids)
+
+    # This is sometimes not equal.
+    cmp('Train timestamps',
+        np.array_equal(data1.train_timestamps(), data2.train_timestamps()))
+
+    cmp('Control source names',
+        data1.control_sources == data2.control_sources)
+    cmp('Instrument source names',
+        data1.instrument_sources == data2.instrument_sources)
+
+    if args.only_metadata:
+        return
+
+    for source in sorted(data1.all_sources & data2.all_sources):
+        cmp(f'{source} keys', data1[source].keys() == data2[source].keys())
+
+        sd1 = data1[source]
+        sd2 = data2[source]
+
+        counts1 = {grp: sd1.data_counts(labelled=False, index_group=grp)
+                   for grp in sd1.index_groups}
+        counts2 = {grp: sd2.data_counts(labelled=False, index_group=grp)
+                   for grp in sd2.index_groups}
+
+        for index_group in sorted(counts1.keys() & counts2.keys()):
+            index_group_str = f'/{index_group}' if index_group else ''
+            cmp(f'{source}{index_group_str} counts',
+                np.array_equal(counts1[index_group], counts2[index_group]))
+
+        if args.only_index or (sd1.is_instrument and args.only_control):
+            continue
+
+        if not sd1.is_instrument:
+            run_values1 = sd1.run_values()
+            run_values2 = sd2.run_values()
+
+            cmp(f'{source} run keys', run_values1.keys() == run_values2.keys())
+
+            for key in sorted(run_values1.keys() & run_values2.keys()):
+                value1 = run_values1[key]
+                value2 = run_values2[key]
+
+                if isinstance(value1, np.ndarray):
+                    is_equal = np.array_equal(
+                        value1, value2,
+                        equal_nan=np.issubdtype(value1.dtype, np.floating))
+                elif isinstance(value1, np.floating):
+                    is_equal = np.array_equal(value1, value2, equal_nan=True)
+                else:
+                    is_equal = run_values1[key] == run_values2[key]
+
+                cmp(f'{source}, {key} run value', is_equal)
+
+        for key in sorted(sd1.keys() & sd2.keys()):
+            cmp(f'{source}, {key} data',
+                np.array_equal(sd1[key].ndarray(), sd2[key].ndarray(),
+                               equal_nan=True))
+
+    if not args.quiet:
+        num_total_cond = num_true_cond + num_false_cond
+        print('Compared {} items, {} are equal and {} are not'.format(
+            num_total_cond, num_true_cond, num_false_cond))
+    else:
+        print(num_false_cond)