#!/usr/bin/python3.13 from __future__ import print_function desc = """Generate the difference of two YAML files into a new YAML file (works on pair of directories too). A new attribute 'Added' is set to True or False depending whether the entry is added or removed from the first input to the next. The tools requires PyYAML.""" import yaml # Try to use the C parser. try: from yaml import CLoader as Loader except ImportError: from yaml import Loader import optrecord import argparse from collections import defaultdict if __name__ == "__main__": parser = argparse.ArgumentParser(description=desc) parser.add_argument( "yaml_dir_or_file_1", help="An optimization record file or a directory searched for optimization " "record files that are used as the old version for the comparison", ) parser.add_argument( "yaml_dir_or_file_2", help="An optimization record file or a directory searched for optimization " "record files that are used as the new version for the comparison", ) parser.add_argument( "--jobs", "-j", default=None, type=int, help="Max job count (defaults to %(default)s, the current CPU count)", ) parser.add_argument( "--max-size", "-m", default=100000, type=int, help="Maximum number of remarks stored in an output file", ) parser.add_argument( "--no-progress-indicator", "-n", action="store_true", default=False, help="Do not display any indicator of how many YAML files were read.", ) parser.add_argument("--output", "-o", default="diff{}.opt.yaml") args = parser.parse_args() files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1) files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2) print_progress = not args.no_progress_indicator all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress) all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress) added = set(all_remarks2.values()) - set(all_remarks1.values()) removed = set(all_remarks1.values()) - set(all_remarks2.values()) for r in added: r.Added = True for r in removed: r.Added = False result = list(added | removed) for r in result: r.recover_yaml_structure() for i in range(0, len(result), args.max_size): with open(args.output.format(i / args.max_size), "w") as stream: yaml.dump_all(result[i : i + args.max_size], stream)