#!/usr/bin/python3
import os
import sys
import re
import time
from datetime import timedelta
from pathlib import Path
from signal import SIGKILL
from subprocess import run, PIPE, STDOUT
from typing import Dict, List, Optional

from retrace.retrace import (STATUS_FAIL,
                             get_active_tasks,
                             get_md5_tasks,
                             get_running_tasks,
                             run_ps,
                             RetraceTask)
from retrace.config import Config, LSOF_BIN

CONFIG = Config()


def get_process_tree(process_id: int, ps_output: List[str]) -> List[int]:
    result = [process_id]

    parser = re.compile(r"^\s*(\d+)\s+%d\s" % process_id, re.ASCII)

    for line in ps_output:
        match = parser.match(line)
        if match:
            process_id = int(match.group(1))
            result.extend(get_process_tree(process_id, ps_output))

    return result


def kill_process_and_childs(process_id: int, ps_output: Optional[List[str]] = None) -> bool:
    result = True

    if ps_output is None:
        ps_output = run_ps()

    for proc_id in get_process_tree(process_id, ps_output):
        try:
            os.kill(proc_id, SIGKILL)
        except OSError:
            result = False

    return result


def check_open_crash_file(retrace_task: RetraceTask) -> bool:
    """
    Check if vmcore or coredump for given task is used by another process.
    """
    if retrace_task.has_vmcore():
        crash_path = retrace_task.get_vmcore_path()
    elif retrace_task.has_coredump():
        crash_path = retrace_task.get_crashdir() / retrace_task.COREDUMP_FILE
    else:
        return False

    lsof = run([LSOF_BIN, "+wt", crash_path], stdout=PIPE, check=False, encoding="utf-8")
    pids = lsof.stdout

    if pids:
        return True

    return False


def check_config() -> None:
    if CONFIG["DeleteTaskAfter"] > 0 and CONFIG["ArchiveTaskAfter"] > 0:
        winner = "archiving"
        if CONFIG["DeleteTaskAfter"] < CONFIG["ArchiveTaskAfter"]:
            winner = "deleting"

        sys.stderr.write("WARNING: Both DeleteTaskAfter and ArchiveTaskAfter "
                         "are enabled, however they are mutually exclusive "
                         "settings. With your current settings only %s will "
                         "take place. Please check retrace-server.conf and "
                         "disable either DeleteTaskAfter or ArchiveTaskAfter "
                         "by setting its value to 0.\n" % winner)

    if (CONFIG["DeleteFailedTaskAfter"] > 0 and
            CONFIG["DeleteTaskAfter"] > 0 and
            CONFIG["DeleteTaskAfter"] < CONFIG["DeleteFailedTaskAfter"]):
        sys.stderr.write("ERROR: DeleteTaskAfter is set to a lower value than "
                         "DeleteFailedTaskAfter. Check retrace-server.conf, "
                         "did you accidentally reverse the values for "
                         "DeleteTaskAfter and DeleteFailedTaskAfter? "
                         "Not touching any tasks.\n")
        sys.exit(1)


if __name__ == "__main__":
    check_config()

    logfile = Path(CONFIG["LogDir"], "cleanup.log")

    with logfile.open("a") as log:
        log.write(time.strftime("[%Y-%m-%d %H:%M:%S] Running cleanup\n"))

        # kill tasks running > 1 hour
        ps_out = run_ps()
        running_tasks = get_running_tasks(ps_out)
        for pid, taskid, runtime in running_tasks:
            # do not kill tasks started from task manager
            if CONFIG["AllowTaskManager"]:
                try:
                    task = RetraceTask(taskid)
                    if task.get_managed():
                        continue
                except Exception:
                    log.write("RetraceTask with taskid %d does not exist\n" % taskid)

            if runtime > 60 * 60:  # 1 hour
                elapsed = timedelta(seconds=runtime)
                log.write("Killing task %d running for %s\n" % (taskid, elapsed))
                kill_process_and_childs(pid, ps_out)

        # kill orphaned tasks
        running_ids = [taskid for _, taskid, _ in get_running_tasks()]

        for taskid in get_active_tasks():
            if taskid not in running_ids:
                log.write("Cleaning up orphaned task %d\n" % taskid)
                try:
                    task = RetraceTask(taskid)
                except Exception:
                    log.write("Unable to create RetraceTask object for task %d\n" % taskid)
                    continue

                task.create_worker().clean_task()
                task.set_log("Task was killed due to running too long or taking too many resources.\n", True)

        md5_tasks: Dict[str, RetraceTask] = {}
        total_savings = 0
        for task in get_md5_tasks():
            md5 = str.split(task.get_md5sum())[0]
            if md5 in md5_tasks:
                worker = task.create_worker()
                worker.begin_logging()
                total_savings += worker.dedup_vmcore(md5_tasks[md5])
                worker.end_logging()
            else:
                md5_tasks[md5] = task

        log.write("Total space savings from duplicate task hardlinking (md5sums equal, different inodes): %d MB\n"
                  % (total_savings // 1024 // 1024))

        if CONFIG["ArchiveTaskAfter"] > 0:
            # archive old tasks
            try:
                files = list(Path(CONFIG["SaveDir"]).iterdir())
            except OSError as ex:
                files = []
                log.write("Error listing task directory: %s\n" % ex)

            for filepath in files:
                try:
                    task = RetraceTask(filepath.name)
                except Exception:
                    continue

                if task.get_age() >= CONFIG["ArchiveTaskAfter"]:
                    log.write("Archiving task %s\n" % filepath.name)
                    dropdir = Path(CONFIG["DropDir"])
                    if not dropdir.is_dir():
                        dropdir.mkdir(parents=True)

                    targetfile = dropdir / ("%s-%s.tar.gz" % (filepath.name, time.strftime("%Y%m%d%H%M%S")))

                    child = run(["tar", "czf", str(targetfile), str(task.get_savedir())],
                                stdout=PIPE, stderr=STDOUT, check=False)
                    stdout = child.stdout
                    if child.returncode:
                        log.write("Error: tar exited with %d: %r\n" % (child.returncode, stdout))
                        try:
                            targetfile.unlink()
                        except OSError:
                            pass

                        continue

                    task.create_worker().remove_task()

        if CONFIG["DeleteTaskAfter"] > 0:
            # clean up old tasks
            try:
                files = list(Path(CONFIG["SaveDir"]).iterdir())
            except OSError as ex:
                files = []
                log.write("Error listing task directory: %s\n" % ex)

            for filepath in files:
                try:
                    task = RetraceTask(filepath.name)
                except Exception:
                    continue

                if task.get_age() >= CONFIG["DeleteTaskAfter"]:
                    if check_open_crash_file(task):
                        log.write("Deletion of task %d skipped - crash file is opened.\n" % task.get_taskid())
                        continue

                    log.write("Deleting old task %s\n" % filepath.name)
                    task.create_worker().remove_task()

        if CONFIG["DeleteFailedTaskAfter"] > 0:
            # clean up old failed tasks
            try:
                files = list(Path(CONFIG["SaveDir"]).iterdir())
            except OSError as ex:
                files = []
                log.write("Error listing task directory: %s\n" % ex)

            for filepath in files:
                try:
                    task = RetraceTask(filepath.name)
                except Exception:
                    continue

                if task.get_age() >= CONFIG["DeleteFailedTaskAfter"] and task.get_status() == STATUS_FAIL:
                    if check_open_crash_file(task):
                        log.write("Deletion of task %d skipped - crash file is opened.\n" % task.get_taskid())
                        continue

                    log.write("Deleting old failed task %s\n" % filepath.name)
                    task.create_worker().remove_task()
