from fabric import Connection
from invoke.exceptions import UnexpectedExit
from invoke import Responder
import os
from typing import Dict, Union
from canesm.exceptions import wrap_canesm_remotefail, RemoteError
import canesm
from canesm.util import (RemoteFile, RemoteDBConn, ProcessBash, ProcessCPPDef, ProcessString,
                         year_from_time, month_from_time, previous_month, add_time, is_null)
from typing import Type, List
import logging
import pandas as pd


class CanESMsetup:
    """
    Class that handles the setup and running of one instance of the CanESM model on a remote machine.

    Parameters
    ----------
    ver :
        git hash or git branch that will used to pull the code
    config :
        `AMIP` or `ESM`
    runid:
        name of the run
    user :
        User name on the machine where the job will be ran
    run_directory :
        Directory name where the code will be stored
    machine :
        Name of the machine where the job is run, either `hare` or `brooks`

    Examples
    --------
    >>> esm = CanESMsetup(ver='develop-canesm', config='AMIP', runid='testrun',
    ...                   user='raa000', run_directory='test_folder', machine='hare')
    >>> esm.start_time = 2000
    >>> esm.stop_time = 2100
    >>> esm.canesm_cfg['gpxsave'] = 'on'
    >>> esm.restart_files['runid_in'] = 'vsa_v4_01'
    >>> esm.restart_files['date_in'] = '1989_m12'
    >>> esm.tapeload = True
    >>> esm.phys_parm['pp_rdm_num_pert'] = 10
    >>> esm.setup_job()
    >>> esm.submit_job()
    """

    def __init__(self, ver: str = '', config: str = '', runid: str = '',
                 user: str = '', run_directory: str = '', machine: str = ''):

        # machine and user specifications
        self.machine = machine  # 'hare', 'brooks'
        self.run_directory = run_directory  # directory where the code is installed
        self.user = user  # user name used for ssh connections
        self.gateway_conn = 'sci-eccc-in.science.gc.ca'

        # canesm run setup
        self.config = config
        self.ver = ver
        self.runid = runid
        self.start_time = 2003
        self.stop_time = 2008
        self.tapeload = False
        self.setup_flags = None

        # dictionaries containing file-specific setup changes
        self.canesm_cfg = {}
        self.make_job = {}
        self.phys_parm = {}
        self.restart_files = {}
        self.basefile = {}
        self.inline_diag_nl = {}

        self.cpp_defs = {}

        self.phys_parm_from_local = False
        self.inline_from_local = False

        self.is_setup = False
        self.responders = [Responder(pattern=r'\*\*\* Use .* option to use .* in the local run directory. \*\*\*',
                                     response='\n'),
                           Responder(pattern=r'WARNING: directory .* exists!',
                                     response='y\n'),
                           Responder(pattern=r'WARNING: year_rtdiag_start=.* != start_year=.* or/and'
                                             r' month_rtdiag_start=.* != start_month=.* !!!',
                                     response='y\n')]

        self._runpath = None
        self._ccrnsrc = None
        self._wrk_dir = None

        self.logger = logging.getLogger('canesm-ensemble')

    @property
    def job_str(self) -> str:
        return f"{self.runid}_{self.start_year}m{self.start_month:02d}_" \
               f"{self.stop_year}m{self.stop_month:02d}_{self.machine}_job"

    @property
    def start_year(self) -> int:
        return year_from_time(self.start_time)

    @property
    def start_month(self) -> int:
        return month_from_time(self.start_time, default_month=1)

    @property
    def stop_year(self) -> int:
        return year_from_time(self.stop_time)

    @property
    def stop_month(self) -> int:
        return month_from_time(self.stop_time, default_month=12)

    @property
    def runpath(self) -> str:
        if self._runpath is None:
            result = self.run_command('echo $RUNPATH')
            self._runpath = result.stdout.strip()
        return self._runpath

    @property
    def ccrnsrc(self) -> str:
        if self._ccrnsrc is None:
            result = self.run_command('echo $CCRNSRC')
            self._ccrnsrc = result.stdout.strip()
        return self._ccrnsrc

    @property
    def wrk_dir(self) -> str:
        if self._wrk_dir is None:
            result = self.run_command('echo $WRK_DIR')
            self._wrk_dir = result.stdout.strip()
        return self._wrk_dir

    @property
    def events(self) -> pd.DataFrame:
        """
        return the history of events
        """

        filename = os.path.join(self.ccrnsrc, '..', self.runid + '-log.db')
        with RemoteDBConn(filename, machine=self.machine, user=self.user) as db:
            data = pd.read_sql_query('SELECT * FROM events', db)

        return data

    @property
    def queue_files(self) -> List[str]:
        """
        return a list of files from the .queue directory associated with this run
        """
        files = self.run_command('ls ~/.queue', setup_env=False, run_directory='~').stdout.strip().split('\n')

        qfiles = []
        for file in files:
            if self.runid in file:
                qfiles.append(file)
        return qfiles

    def load_file(self, file, directory: str = ''):
        """
        return the contents of a file from the remote machine

        Parameters
        ----------
        file :
            Name of the file in the queue directory
        directory :
            Name of the directory where file is located, defaults to ~/.queue

        """

        file = os.path.join(directory, file)
        try:
            with RemoteFile(file, machine=self.machine, user=self.user) as f:
                contents = f.read()
        except FileNotFoundError:
            contents = None

        return contents

    @wrap_canesm_remotefail
    def _process_file(self, file: str, settings: Dict[str, Union[str, int, float]],
                      src_dir: str = None, processor: Type[ProcessString] = ProcessBash):
        """
        Update the file with the new settings. Options specified by the dictionary key will be set with the value.

        Parameters
        ----------
        file :
            name of the file to be processed
        settings :
            A dictionary of options that will changed in the file
        src_dir :
            The directory of the file. If not provided it is assumed to be `self.run_directory`
        processor:
            The string processor used to replace parameter values
        """
        if len(settings.keys()) == 0:
            return

        if src_dir is None:
            src_dir = self.run_directory

        # make local copy of the file, edit it, and put it back on remote
        with RemoteFile(os.path.join(src_dir, file), self.machine, self.user, self.gateway_conn, mode='r') as f:
            file_contents = f.read()
        with RemoteFile(os.path.join(src_dir, file), self.machine, self.user, self.gateway_conn, mode='w') as f:
            f.write(processor(file_contents).process(settings))

    def _clone_code(self, srclnk: str = None):
        """
        Use the setup-canesm script on the remote machine to download the code from the gitlab repository
        """
        if self.ver == '' or self.config == '' or self.runid == '':
            raise ValueError('version, config and runid must be specified before running')

        setup_str = self.get_setup_canesm_exec()
        clone_str = setup_str + ' ver=' + self.ver + ' config=' + self.config + ' runid=' + self.runid
        clone_str = self._add_flags(clone_str)
        if srclnk is not None:
            clone_str += ' srclnk=' + srclnk

        try:
            self.run_command(clone_str, setup_env=False)
        except RemoteError as e:
            # strict checking error can fail here on older versions of CanESM5 if the linked code has been changed
            if '*** STRICT CHECKING ERROR ***' in str(e):
                pass
            else:
                raise e

    def _setup_basefile(self):

        self._process_file('basefile_' + self.runid, self.basefile)

    def _setup_canesm_cfg(self, update_rtdiag: bool = True):

        self.canesm_cfg['start_time'] = f'{self.start_year}:{self.start_month}'
        self.canesm_cfg['stop_time'] = f'{self.stop_year}:{self.stop_month}'
        if update_rtdiag:
            self.canesm_cfg['start_rtdiag'] = f'{self.start_year}:{self.start_month}'
        self._process_file('canesm.cfg', self.canesm_cfg)

    def _setup_phys_parm(self):

        self._process_file('PHYS_PARM_' + self.runid, self.phys_parm)

    def _setup_inline_diag(self):

        self._process_file('INLINE_DIAG_NL_' + self.runid, self.inline_diag_nl)

    def _sync_runpath_file(self, filename, settings):

        if not settings:
            return

        src_dir = self.runpath
        result = self.run_command('ls $RUNPATH')
        files = [f for f in result.stdout.split('\n') if filename.lower() in f.lower()]
        for file in files:
            self.logger.info(self.runid + ': updating ' + filename + ' in $RUNPATH/' + file)
            self.run_command('chmod u+w $RUNPATH/' + file)
            self._process_file(file, settings, src_dir=src_dir)
            self.run_command('chmod u-w $RUNPATH/' + file)

    def _setup_cpp_defs(self):
        """
        Process the compiler definitions.
        """
        if not self.cpp_defs:
            return

        src_dir = os.path.join(self.run_directory, 'cppdir_' + self.runid)
        for file in self.cpp_defs.keys():
            self._process_file(file, self.cpp_defs[file], src_dir=src_dir, processor=ProcessCPPDef)

    def _setup_restart_files(self):

        if is_null(self.tapeload):
            return

        if self.tapeload:
            # set the output filename to the month preceeding the start of the run
            self.restart_files['date_out'] = previous_month(year=self.start_year, month=self.start_month)
            self._process_file('tapeload_rs_' + self.runid, self.restart_files)
        else:
            self._process_file('save_restart_files_' + self.runid, self.restart_files)

    def _save_restart_files(self):
        """
        Move restart files from disc or tape to $RUNPATH
        """
        if is_null(self.tapeload):
            return

        self.logger.info(self.runid + ': saving restarts...')
        option_str = ''
        if self.inline_from_local:
            option_str += ' -d'
        if self.phys_parm_from_local:
            option_str += ' -p'

        if self.tapeload:
            self.run_command('./tapeload_rs_' + self.runid + ' ' + self.job_str + option_str)
        else:
            self.run_command('./save_restart_files_' + self.runid + ' ' + self.job_str + option_str)

    def _make_job_string(self):
        """
        Call the script that makes the job string
        """
        self.run_command('./make_job_' + self.runid)

    def _add_flags(self, clone_str):
        """
        Add additional parameters to the setup-canesm script call

        Parameters
        ----------
        clone_str :
            setup-canesm string that the flags will be added to
        """
        if self.setup_flags is not None:
            clone_str += ' ' + self.setup_flags
        return clone_str

    @wrap_canesm_remotefail
    def _setup_from_base(self, base_directory: str):
        """
        Use code from the `base_directory` instead of cloning from gitlab.

        Parameters
        ----------
        base_directory :
            directory where the base_job is ran from
        """

        # get the source link from the base job
        with Connection(self.machine, user=self.user, gateway=Connection(self.gateway_conn, user=self.user)) as c:
            try:
                result = c.run('ls -l ' + base_directory + '/CanESM_source_link')
                srclnk = os.path.dirname(result.stdout.split(' ')[-1].strip())  # get the link and strip 'CanESM'
            except UnexpectedExit:
                raise FileNotFoundError('base_job could not be found')

        self._clone_code(srclnk=srclnk)

    def _compile(self):
        """
        Compile the code
        """
        self.logger.info(self.runid + ': compiling...')
        self.run_command('./compile_' + self.runid + ' ' + self.job_str)

    def _compile_shared(self, base_directory: str, executable_directory: str = None):
        """
        Reuse the base directory executables instead of recompiling for each run
        """
        self.logger.info(self.runid + ': copying executables instead of compiling...')

        if executable_directory is None:
            base_runpath = self.run_command(f'cd {base_directory} && . env_setup_file && echo $RUNPATH',
                                            setup_env=False, run_directory='~').stdout.strip()
        else:
            base_runpath = executable_directory
        base_files = self.run_command(f'ls {base_runpath}',
                                      setup_env=False, run_directory='~').stdout.strip().split('\n')

        date = previous_month(year=self.start_year, month=self.start_month)
        for file in base_files:
            if '_ab.' in file:
                self._copy_executable(os.path.join(base_runpath, file), f'mc_{self.runid}_{date}_ab')
            if '_cpl.exe' in file:
                self._copy_executable(os.path.join(base_runpath, file), f'mc_{self.runid}_{date}_cpl.exe')
            if '_nemo.exe' in file:
                self._copy_executable(os.path.join(base_runpath, file), f'mc_{self.runid}_nemo.exe')

    def _copy_executable(self, src: str, dest_file: str):

        self.run_command(f'cp {src} {os.path.join(self.runpath, dest_file)}', run_directory='~', setup_env=False)
        self.run_command(f'cd $RUNPATH && save {dest_file} {dest_file}')
        self.run_command(f'cd $RUNPATH && rm {dest_file}')

    def _setup_readme(self):
        """
        Update the README.md file for the run with the setup parameters
        """

        readme = (
            f'# CanESM Run: {self.runid}\n'
            f'## Description\n'
            f'This run was generated using the setup-ensemble script\n\n'
            f'start time: {self.start_time}\n'
            f'stop time: {self.stop_time}\n\n'
            f'### Setup code information\n'
            f'code repository: https: // gitlab.science.gc.ca / CanESM / canesm-ensemble\n'
            f'code version: {canesm.__version__}\n\n'
            f'### Restart information\n'
            + "".join([f'{key}:{val}\n' for key, val in self.restart_files.items()])
            + f'\n### PHYS_PARM changes \n'
            + "".join([f'{key}:{val}\n' for key, val in self.phys_parm.items()])
            + f'\n### canesm.cfg changes \n'
            + "".join([f'{key}:{val}\n' for key, val in self.canesm_cfg.items()])
            + f'\n### basefile changes \n'
            + "".join([f'{key}:{val}\n' for key, val in self.basefile.items()])
            + f'\n### inline_diag_nl changes \n'
            + "".join([f'{key}:{val}\n' for key, val in self.inline_diag_nl.items()])
            + f'\n### cpp definition changes \n'
            + "".join([f'{nkey}\t{key}:{val}\n' for nkey in self.cpp_defs
                       for key, val in self.cpp_defs[nkey].items()])
            + f'\n\n## List of Changes/Interventions\n'
        )

        file = 'README.md'
        with RemoteFile(os.path.join(self.run_directory, file),
                        self.machine, self.user, self.gateway_conn, mode='w') as f:
            f.write(readme)

    @wrap_canesm_remotefail
    def run_command(self, command: str, setup_env: bool = True, run_directory: str = None, hide: bool = False):
        """
        runs a command on the remote maching from the :py:attr:`run_directory`

        Parameters
        ----------
        command :
            command that will be ran on the remote machine
        setup_env :
            if True commands are run inside the job environment
        run_directory :
            location to run the command from. By default commands are ran from the `run_directory`
        hide : 
            if True output from remote command is supressed
        """
        if run_directory is None:
            run_directory = self.run_directory

        with Connection(self.machine, user=self.user, gateway=Connection(self.gateway_conn, user=self.user)) as c:
            with c.cd(run_directory):
                if setup_env:
                    command = '. env_setup_file && ' + command

                result = c.run(command, watchers=self.responders, hide=hide)
        return result

    def get_setup_canesm_exec(self):
        """
        Return full path to the setup-canesm executable.
        """
        # TODO: first check the user account for exec, then fall back to canesm_bin_latest
        return '/home/scrd101/canesm_bin_latest/setup-canesm'

    @wrap_canesm_remotefail
    def setup_dir_structure(self):
        """
        setup the directory structure for the job on the remote machine
        """
        with Connection(self.machine, user=self.user, gateway=Connection(self.gateway_conn, user=self.user)) as c:
            try:
                result = c.run('cd ' + self.run_directory)
            except UnexpectedExit:
                result = c.run('mkdir -p ' + self.run_directory)
            self.logger.info(result.stdout)

    def setup_job(self, base_directory: str = None, share_executables: bool = False, executable_directory: str = None):
        """
        This method performs five main tasks
           - clone the code into the run directory (or copy from a basejob)
           - setup the configuration files (basefile, canesm.cfg, make_job, PHYS_PARM and restart files)
           - make the job string
           - compile the code
           - save restart files

        This should be called after all of the settings are configured.

        Parameters
        ----------
        base_directory : optional
            Optional directory of the base job that the code will be shared from. If not provided the code will
            be cloned from gitlab.
        share_executables : optional
            If set, executables will be copied from the base_directory run.
        executable_directory: optional
            If set the executables will be pulled from this directory instead of base_directory run
        """

        # clone the code
        self.logger.info(self.runid + ': setting up ' + self.runid + '...')
        self.setup_dir_structure()
        if base_directory is None:
            self._clone_code()
        else:
            self._setup_from_base(base_directory)

        # save the readme file
        self._setup_readme()

        # setup config files
        self._setup_basefile()
        self._setup_canesm_cfg(update_rtdiag=False)
        self._setup_phys_parm()
        self._setup_inline_diag()
        self._setup_restart_files()
        self._setup_cpp_defs()

        # make the job
        self._make_job_string()

        # compile the code
        if share_executables:
            if base_directory is None:
                self.logger.warning('base_directory is needed to share executables, compiling instead')
                self._compile()
            else:
                self._compile_shared(base_directory, executable_directory=executable_directory)
        else:
            self._compile()

        # save restart files to backend
        self._save_restart_files()

        # ensure PHYS_PARM options have been added to the RUNPATH version of the file.
        if not self.phys_parm_from_local:
            self._sync_runpath_file('PHYS_PARM', self.phys_parm)
        if not self.inline_from_local:
            self._sync_runpath_file('INLINE_DIAG_NL', self.inline_diag_nl)

        self.is_setup = True
        self.logger.info(self.runid + ': setup succesful for ' + self.runid)

    def delete_job(self):
        """
        Delete all the files on the remote machine related to this job. This includes the working directory,
        source code and restart files.
        """
        try:
            runpath = os.path.split(self.runpath)[0]  # drop /data
            if self.runid not in runpath:  # make sure we know what we are deleting
                raise ValueError(self.runid + ' was not found in ' + runpath + ', not deleting this path')

            self.run_command('chmod -R u+w ' + runpath)
            self.run_command('rm -rf ' + runpath)
        except RemoteError as e:
            self.logger.debug(self.runid + ': ' + str(e))

        try:
            ccrnsrc = os.path.split(self.ccrnsrc)[0]  # drop /code
            if self.runid not in ccrnsrc:  # make sure we know what we are deleting
                raise ValueError(self.runid + ' was not found in ' + ccrnsrc + ', not deleting this path')

            self.run_command('rm -rf ' + ccrnsrc)
        except RemoteError as e:
            self.logger.debug(self.runid + ': ' + str(e))

        try:
            wrk_dir = self.wrk_dir
            if self.runid not in wrk_dir:  # make sure we know what we are deleting
                raise ValueError(self.runid + ' was not found in ' + wrk_dir + ', not deleting this path')

            self.run_command('rm -rf ' + wrk_dir, setup_env=False)
        except RemoteError as e:
            self.logger.debug(self.runid + ': ' + str(e))

    def extend_run(self, years: Union[int, str]):
        """
        Extend the job to a longer simulation time

        Parameters
        ----------
        years :
            Extend the run by 'YYYY_mMM' time. If an integer is provided the run will be extended by this number of
            years. For example, to extend the run by 2 years and 6 months use format years='2_m06'.
        """

        self.start_time = add_time(f'{self.stop_year}_m{self.stop_month}', '0000_m01')
        self.stop_time = add_time(f'{self.stop_year}_m{self.stop_month}', years)
        self._setup_canesm_cfg(update_rtdiag=False)
        self._make_job_string()

    def submit_job(self):
        """
        submits the job to backend.

        Raises
        ------
        ValueError
            If the model has not been setup
        """
        if not self.is_setup:
            raise ValueError('the job must be setup before submitting')

        self.run_command('rsub ' + self.machine + ' ' + self.job_str)