#!/usr/bin/python3
# Gets list of packages necessary for processing of a coredump.
# Uses eu-unstrip and dnf.

import argparse
import logging
import subprocess
import sys
from typing import List, Tuple, Union

import dnf
from dnf.package import Package


logger = logging.getLogger('coredump2packages')
PackageList = List[Package]


def binary_packages_from_debuginfo_package(debuginfo_package: Package, binobj_path: str,
                                           dnfbase: dnf.Base) -> PackageList:
    """
    Returns a list of packages corresponding to the provided debuginfo
    package. One of the packages in the list contains the binary
    specified in binobj_path; this is a list because if binobj_patch
    is not specified (and sometimes it is not, binobj_path might
    contain just '-'), we do not know which package contains the
    binary, we know only packages from the same SRPM as the debuginfo
    package.
    """
    package_list: List[Package] = []
    if binobj_path == "-":  # [exe] without binary name
        logger.info("Dnf search for [exe] without binary name, packages with NVR %s:%s-%s.%s...",
                    debuginfo_package.epoch, debuginfo_package.version,
                    debuginfo_package.release, debuginfo_package.arch)
        # Append all packages with the same base package name.
        # Other possibility is to download the debuginfo RPM,
        # unpack it, and get the name of the binary from the
        # /usr/lib/debug/.build-id/xx/yyyyyy symlink.
        evra_list = dnfbase.sack.query().filter(epoch=debuginfo_package.epoch,
                                                version=debuginfo_package.version,
                                                release=debuginfo_package.release,
                                                arch=debuginfo_package.arch)
        for package in evra_list:
            logger.info("    - %s: base name '%s'", str(package), package.name)
            if package.name != debuginfo_package.name:
                continue
            package_list.append(package)
    else:
        logger.info("   Dnf search for %s...", binobj_path)
        binobj_package_list = (dnfbase.sack.query()
                               .filter(file=binobj_path,
                                       arch=debuginfo_package.arch))
        for binobj_package in binobj_package_list:
            logger.info("    - %s", str(binobj_package))
            if binobj_package.evr_cmp(debuginfo_package) != 0:
                logger.info("    ... NVR doesn't match")
                continue
            logger.info("    ... NVR matches")
            package_list.append(binobj_package)
    return package_list


def process_unstrip_entry(build_id: str, binobj_path: str, dnfbase: dnf.Base) -> Tuple[PackageList, List[str]]:
    """
    Returns a tuple of two items.

    First item is a list of packages which we found to be associated
    with the unstrip entry defined by build_id and binobj_path.

    Second item is a list of package versions (same package name,
    different epoch-version-release), which contain the binary object
    (an executable or shared library) corresponding to this unstrip
    entry. If this method failed to find an unique package name (with
    only different versions), this list contains the list of base
    package names. This item can be used to associate a coredump with
    some crashing package.
    """
    package_list: PackageList = []
    coredump_package_list: List[str] = []
    coredump_base_package_list: List[str] = []
    # Ask for a known path from debuginfo package.
    debuginfo_paths = [
        "/usr/lib/debug/.build-id/{0}/{1}.debug".format(build_id[:2], build_id[2:]),
        "/usr/lib/.build-id/{0}/{1}".format(build_id[:2], build_id[2:])
    ]

    logger.info("Dnf search for debuginfo packages for build-id %s:", build_id)
    for di_path in debuginfo_paths:
        logger.info(" - %s", di_path)
    debuginfo_package_list = dnfbase.sack.query().filter(file=debuginfo_paths)

    # A problem here is that some libraries lack debuginfo. Either
    # they were stripped during build, or they were not stripped by
    # /usr/lib/rpm/find-debuginfo.sh because of wrong permissions or
    # something. The proper solution is to detect such libraries and
    # fix the packages.
    for debuginfo_package in debuginfo_package_list:
        logger.info(" - %s", str(debuginfo_package))
        package_list.append(debuginfo_package)
        binary_packages = binary_packages_from_debuginfo_package(debuginfo_package, binobj_path, dnfbase)
        coredump_base_package_list.append(debuginfo_package.name)
        if len(binary_packages) == 1:
            coredump_package_list.append(str(binary_packages[0]))
        package_list.extend(binary_packages)

    if len(coredump_package_list) == len(coredump_base_package_list):
        return package_list, coredump_package_list
    return package_list, coredump_base_package_list


def process_unstrip_output(unstrip: str, dnfbase: dnf.Base) \
        -> Tuple[PackageList, List[Tuple[str, str]], List[str]]:
    """
    Parse the eu-unstrip output, and search for packages via dnf.

    Returns a tuple containing three items:
      - a list of package objects
      - a list of missing buildid entries
      - a list of coredump package adepts
    """
    # List of packages found in dnf repositories and matching the
    # coredump.
    package_list: PackageList = []
    # List of pairs (library/executable path, build id) which were not
    # found via dnf.
    missing_buildid_list: List[Tuple[str, str]] = []
    # coredump package adepts
    coredump_package_list: List[str] = []

    first_entry = True
    for line in unstrip.split("\n"):
        parts = line.split()
        if not parts or len(parts) < 3:
            continue

        # A line of the command output consists of the following five
        # whitespace-separated fields:
        #   0 address+size
        #   1 buildid
        #   2 file
        #   3 debug_file
        #   4 module_name
        build_id = parts[1].split("@")[0]
        binobj_path = parts[2]
        # try/except to handle malformed eu-unstrip output
        # e.g. for X.org cores
        try:
            # if FILE (parts[2]) is not present on local filesystem
            # eu-unstrip uses FILE as MODULENAME (parts[4])
            if binobj_path in ("-", ".") and parts[4] != "[exe]":
                binobj_path = parts[4]
            if binobj_path[0] != "/" and parts[4] != "[exe]":
                continue
        except Exception:
            continue

        entry_package_list, entry_coredump_package_list = process_unstrip_entry(build_id, binobj_path, dnfbase)
        if first_entry:
            coredump_package_list = entry_coredump_package_list
            first_entry = False
        if len(entry_package_list) == 0:
            missing_buildid_list.append((binobj_path, build_id))
        else:
            for entry_package in entry_package_list:
                found = False
                for package in package_list:
                    if str(entry_package) == str(package):
                        found = True
                        break
                if not found:
                    package_list.append(entry_package)

    return package_list, missing_buildid_list, coredump_package_list


# The package list might contain multiple packages with the same name,
# but different version. This happens because some binary had the same
# build id over multiple package releases.
def find_duplicates(package_list: PackageList) -> Union[Tuple[None, None], Tuple[Package, Package]]:
    for i, package1 in enumerate(package_list[:-1]):
        for package2 in package_list[i + 1:]:
            if package1.name == package2.name and package1.arch == package2.arch:
                return package1, package2
    return None, None


def count_removals(package_list: PackageList, base_package_name: str, epoch: str,
                   ver: str, rel: str, arch: str) -> int:
    count = 0
    for package in package_list:
        if package.name != base_package_name:
            continue
        if package.epoch != epoch or package.ver != ver or package.rel != rel or package.arch != arch:
            continue
        count += 1
    return count



def main() -> None:
    parser = argparse.ArgumentParser(description="Get packages for coredump processing.")
    parser.add_argument("--repos", default="*", metavar="WILDCARD",
                        help="DNF repository wildcard to be enabled")
    parser.add_argument("--config", help="DNF config file")
    parser.add_argument("coredump", help="Coredump")
    parser.add_argument("--log", metavar="FILENAME",
                        help="Append debug output to a file")
    args = parser.parse_args()

    if args.log:
        logging.basicConfig(filename=args.log, level=logging.INFO)
    else:
        logging.disable(logging.CRITICAL)

    # Initialize DNF, enable only repositories specified via command line
    # --repos option.
    dnfbase = dnf.Base()
    if args.config:
        dnfbase.conf.read(args.config)
    else:
        dnfbase.conf.read()
    if not dnfbase.conf.cachedir:
        sys.exit(2)
    dnfbase.read_all_repos()
    logger.info("Closing all enabled repositories...")
    for repo in dnfbase.repos.iter_enabled():
        logger.info(" - %s", repo.name)
        repo.disable()

    logger.info("Enabling repositories matching '%s'...", args.repos)
    for repo in dnfbase.repos.get_matching(args.repos):
        logger.info(" - %s", repo.name)
        repo.enable()
        repo.skip_if_unavailable = True

    # Fill the sack with repository
    dnfbase.fill_sack(load_system_repo=False)

    # Get eu-unstrip output, which contains build-ids and binary object
    # paths
    logger.info("Running eu-unstrip...")
    unstrip_args = ["eu-unstrip", f"--core={args.coredump}", "-n"]
    unstrip_proc = subprocess.run(unstrip_args, stdout=subprocess.PIPE, check=False, encoding="utf8")
    unstrip = unstrip_proc.stdout
    logger.info("%s", unstrip)
    if not unstrip:
        sys.exit(1)

    package_list, missing_buildid_list, coredump_package_list = process_unstrip_output(unstrip, dnfbase)

    logger.info("Checking for duplicates...")
    while True:
        package1, package2 = find_duplicates(package_list)
        if package1 is None:
            break
        assert package2 is not None
        p1removals = count_removals(package_list,
                                    package1.name,
                                    package1.epoch,
                                    package1.version,
                                    package1.release,
                                    package1.arch)
        p2removals = count_removals(package_list,
                                    package2.name,
                                    package2.epoch,
                                    package2.version,
                                    package2.release,
                                    package2.arch)

        logger.info(" - %s", package1.name)
        if package1.name != package2.name:
            logger.info("   %s", package2.name)
        else:
            logger.info("\n")
        logger.info("   - %s:%s-%s.%s (%s dependent packages)", package1.epoch,
                    package1.version, package1.release, package1.arch, p1removals)
        logger.info("   - %s:%s-%s.%s (%s dependent packages)", package2.epoch,
                    package2.version, package2.release, package2.arch, p2removals)

        removal_candidate = package1
        if p1removals == p2removals:
            # Remove older if we can choose
            if package1.evr_cmp(package2) > 0:
                removal_candidate = package2
            logger.info("   - decided to remove %s:%s-%s.%s because it's older",
                        removal_candidate.epoch, removal_candidate.version,
                        removal_candidate.release, removal_candidate.arch)
        else:
            if p1removals > p2removals:
                removal_candidate = package2
            logger.info("   - decided to remove %s:%s-%s.%s because has fewer dependencies",
                        removal_candidate.epoch, removal_candidate.version,
                        removal_candidate.release, removal_candidate.arch)
        # Remove the removal_candidate packages from the package list
        for package in package_list[:]:
            if package.name == removal_candidate.name and package.evr_cmp(removal_candidate) == 0:
                package_list.remove(package)

    # Clean coredump_package_list:
    for coredump_package in coredump_package_list[:]:
        found = False
        for package in package_list:
            if str(package) == coredump_package or package.name == coredump_package:
                found = True
                break
        if not found:
            coredump_package_list.remove(coredump_package)

    # Print names of found packages first, then a newline separator, and
    # then objects for which the packages were not found.
    if len(coredump_package_list) == 1:
        print(coredump_package_list[0])
    else:
        print("-")
    print()

    for package in sorted(package_list):
        print(str(package))
    print()

    for path, build_id in missing_buildid_list:
        print(f"{path} {build_id}")


if __name__ == "__main__":
    main()
else:
    raise NotImplementedError
