Source code for mpas_analysis.main

#!/usr/bin/env python
# This software is open source software available under the BSD-3 license.
#
# Copyright (c) 2022 Triad National Security, LLC. All rights reserved.
# Copyright (c) 2022 Lawrence Livermore National Security, LLC. All rights
# reserved.
# Copyright (c) 2022 UT-Battelle, LLC. All rights reserved.
#
# Additional copyright and license information can be found in the LICENSE file
# distributed with this code, or at
# https://raw.githubusercontent.com/MPAS-Dev/MPAS-Analysis/master/LICENSE

"""
Runs MPAS-Analysis via a configuration file (e.g. `analysis.cfg`)
specifying analysis options.
"""
# Authors
# -------
# Xylar Asay-Davis, Phillip J. Wolfram, Milena Veneziani

import mpas_analysis

import argparse
import traceback
import sys
import shutil
import os
from collections import OrderedDict
import progressbar
import logging
import xarray
import time
from importlib.resources import contents

from mache import discover_machine, MachineInfo

from mpas_tools.config import MpasConfigParser

from mpas_analysis.shared.analysis_task import AnalysisFormatter

from mpas_analysis.shared.io.utility import build_config_full_path, \
    make_directories, copyfile

from mpas_analysis.shared.html import generate_html

from mpas_analysis.shared import AnalysisTask
from mpas_analysis.shared.analysis_task import \
    update_time_bounds_from_file_names

from mpas_analysis.shared.plot.colormap import register_custom_colormaps, \
    _plot_color_gradients

from mpas_analysis import ocean
from mpas_analysis import sea_ice
from mpas_analysis.shared.climatology import MpasClimatologyTask, \
    RefYearMpasClimatologyTask
from mpas_analysis.shared.time_series import MpasTimeSeriesTask

from mpas_analysis.shared.regions import ComputeRegionMasks


def update_time_bounds_in_config(config):
    """
    Updates the start and end year (and associated full date) for
    climatologies, time series and climate indices based on the files that are
    actually available.

    Parameters
    ----------
    config : mpas_tools.config.MpasConfigParser
        contains config options

    """
    # By updating the bounds for each component, we should end up with the
    # more constrained time bounds if any component has less output than others
    for componentName in ['ocean', 'seaIce']:
        for section in ['climatology', 'timeSeries', 'index']:
            update_time_bounds_from_file_names(config, section, componentName)


[docs]def build_analysis_list(config, controlConfig):
    """
    Build a list of analysis tasks. New tasks should be added here, following
    the approach used for existing analysis tasks.

    Parameters
    ----------
    config : mpas_tools.config.MpasConfigParser
        contains config options

    controlConfig : mpas_tools.config.MpasConfigParser or None
        contains config options for a control run, or ``None`` if no config
        file for a control run was specified

    Returns
    -------
    analyses : list of ``AnalysisTask`` objects
        A list of all analysis tasks
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    analyses = []

    # Ocean Analyses
    oceanClimatologyTasks = {}
    for op in ['avg', 'min', 'max']:
        oceanClimatologyTasks[op] = MpasClimatologyTask(config=config,
                                                        componentName='ocean',
                                                        op=op)
    oceanTimeSeriesTask = MpasTimeSeriesTask(config=config,
                                             componentName='ocean')
    oceanIndexTask = MpasTimeSeriesTask(config=config,
                                        componentName='ocean',
                                        section='index')

    oceanRefYearClimatologyTask = RefYearMpasClimatologyTask(
        config=config, componentName='ocean')

    oceanRegionMasksTask = ComputeRegionMasks(config=config,
                                              conponentName='ocean')

    for op in oceanClimatologyTasks:
        analyses.append(oceanClimatologyTasks[op])
    analyses.append(oceanRefYearClimatologyTask)

    analyses.append(ocean.ClimatologyMapMLD(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))

    analyses.append(ocean.ClimatologyMapMLDMinMax(config,
                                                  oceanClimatologyTasks,
                                                  controlConfig))

    analyses.append(ocean.ClimatologyMapSST(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.ClimatologyMapSSS(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.ClimatologyMapSSH(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.ClimatologyMapEKE(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.ClimatologyMapBSF(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.ClimatologyMapOHCAnomaly(
        config, oceanClimatologyTasks['avg'], oceanRefYearClimatologyTask,
        controlConfig))

    analyses.append(ocean.ClimatologyMapSose(
        config, oceanClimatologyTasks['avg'], controlConfig))
    analyses.append(ocean.ClimatologyMapWoa(
        config, oceanClimatologyTasks['avg'], controlConfig))
    analyses.append(ocean.ClimatologyMapBGC(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))

    analyses.append(ocean.ClimatologyMapArgoTemperature(
        config, oceanClimatologyTasks['avg'], controlConfig))
    analyses.append(ocean.ClimatologyMapArgoSalinity(
        config, oceanClimatologyTasks['avg'], controlConfig))

    analyses.append(ocean.ClimatologyMapSchmidtko(
        config, oceanClimatologyTasks['avg'], controlConfig))

    analyses.append(ocean.ClimatologyMapAntarcticMelt(
        config, oceanClimatologyTasks['avg'], oceanRegionMasksTask,
        controlConfig))

    analyses.append(ocean.RegionalTSDiagrams(
        config, oceanClimatologyTasks['avg'], oceanRegionMasksTask,
        controlConfig))

    analyses.append(ocean.TimeSeriesAntarcticMelt(config, oceanTimeSeriesTask,
                                                  oceanRegionMasksTask,
                                                  controlConfig))

    analyses.append(ocean.TimeSeriesOceanRegions(config, oceanRegionMasksTask,
                                                 controlConfig))

    analyses.append(ocean.TimeSeriesTemperatureAnomaly(config,
                                                       oceanTimeSeriesTask))
    analyses.append(ocean.TimeSeriesSalinityAnomaly(config,
                                                    oceanTimeSeriesTask))
    analyses.append(ocean.TimeSeriesOHCAnomaly(config,
                                               oceanTimeSeriesTask,
                                               controlConfig))
    analyses.append(ocean.TimeSeriesSSHAnomaly(config,
                                               oceanTimeSeriesTask,
                                               controlConfig))
    analyses.append(ocean.TimeSeriesSST(config, oceanTimeSeriesTask,
                                        controlConfig))
    analyses.append(ocean.TimeSeriesTransport(config, controlConfig))

    analyses.append(ocean.OceanHistogram(config, oceanClimatologyTasks['avg'],
                                         oceanRegionMasksTask,
                                         controlConfig))
    analyses.append(ocean.MeridionalHeatTransport(
        config, oceanClimatologyTasks['avg'], controlConfig))

    analyses.append(ocean.StreamfunctionMOC(config,
                                            oceanClimatologyTasks['avg'],
                                            controlConfig))
    analyses.append(ocean.IndexNino34(config, oceanIndexTask, controlConfig))

    analyses.append(ocean.WoceTransects(config, oceanClimatologyTasks['avg'],
                                        controlConfig))

    analyses.append(ocean.SoseTransects(config, oceanClimatologyTasks['avg'],
                                        controlConfig))

    analyses.append(ocean.GeojsonTransects(config,
                                           oceanClimatologyTasks['avg'],
                                           controlConfig))

    oceanRegionalProfiles = ocean.OceanRegionalProfiles(
        config, oceanRegionMasksTask, controlConfig)
    analyses.append(oceanRegionalProfiles)

    analyses.append(ocean.HovmollerOceanRegions(
        config, oceanRegionMasksTask, oceanRegionalProfiles, controlConfig))

    # Sea Ice Analyses
    seaIceClimatologyTask = MpasClimatologyTask(config=config,
                                                componentName='seaIce')
    seaIceTimeSeriesTask = MpasTimeSeriesTask(config=config,
                                              componentName='seaIce')

    analyses.append(seaIceClimatologyTask)
    analyses.append(sea_ice.ClimatologyMapSeaIceConc(
        config=config, mpasClimatologyTask=seaIceClimatologyTask,
        hemisphere='NH', controlConfig=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceThick(
        config=config, mpasClimatologyTask=seaIceClimatologyTask,
        hemisphere='NH', controlConfig=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceConc(
        config=config, mpasClimatologyTask=seaIceClimatologyTask,
        hemisphere='SH', controlConfig=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceThick(
        config=config, mpasClimatologyTask=seaIceClimatologyTask,
        hemisphere='SH', controlConfig=controlConfig))
    analyses.append(seaIceTimeSeriesTask)
    analyses.append(sea_ice.ClimatologyMapSeaIceProduction(
        config=config, mpas_climatology_task=seaIceClimatologyTask,
        hemisphere='NH', control_config=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceProduction(
        config=config, mpas_climatology_task=seaIceClimatologyTask,
        hemisphere='SH', control_config=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceMelting(
        config=config, mpas_climatology_task=seaIceClimatologyTask,
        hemisphere='NH', control_config=controlConfig))
    analyses.append(sea_ice.ClimatologyMapSeaIceMelting(
        config=config, mpas_climatology_task=seaIceClimatologyTask,
        hemisphere='SH', control_config=controlConfig))

    analyses.append(sea_ice.TimeSeriesSeaIce(config, seaIceTimeSeriesTask,
                                             controlConfig))

    # Iceberg Analyses
    analyses.append(sea_ice.ClimatologyMapIcebergConc(
        config=config, mpasClimatologyTask=seaIceClimatologyTask,
        hemisphere='SH', controlConfig=controlConfig))

    check_for_duplicate_names(analyses)

    return analyses


def check_for_duplicate_names(analyses):
    """
    Check for duplicate taskName and subtaskName in the list of analysis tasks
    and their subtasks

    Parameters
    ----------
    analyses : list of mpas_analysis.shared.AnalysisTask
        A list of all analysis tasks
    """
    all_task_names = []
    errors = []
    for analysis in analyses:
        mainTaskName = analysis.taskName
        assert(analysis.subtaskName is None)
        fullName = (mainTaskName, None)
        if fullName in all_task_names:
            errors.append(
                f'A task named {mainTaskName} has been added more than once')
        all_task_names.append(fullName)
        for subtask in analysis.subtasks:
            taskName = subtask.taskName
            subtaskName = subtask.subtaskName
            if taskName != mainTaskName:
                errors.append(
                    f'A subtask named {taskName}: {subtaskName} has a '
                    f'different task name than its parent task: \n'
                    f'  {mainTaskName}')
                fullName = (taskName, subtaskName)
                if fullName in all_task_names:
                    errors.append(
                        f'A subtask named {taskName}: {subtaskName} has been '
                        f'added more than once')
                all_task_names.append(fullName)

    if len(errors) > 0:
        all_errors = '\n  '.join(errors)
        raise ValueError(f'Analysis tasks failed these checks:\n'
                         f'  {all_errors}')


[docs]def determine_analyses_to_generate(analyses, verbose):
    """
    Build a list of analysis tasks to run based on the 'generate' config
    option (or command-line flag) and prerequisites and subtasks of each
    requested task.  Each task's ``setup_and_check`` method is called in the
    process.

    Parameters
    ----------
    analyses : list of ``AnalysisTask`` objects
        A list of all analysis tasks

    verbose : bool
        Whether to write out a full stack trace when exceptions occur during
        ``setup_and_check()`` calls for each task

    Returns
    -------
    analysesToGenerate : ``OrderedDict`` of ``AnalysisTask`` objects
        A dictionary of analysis tasks to run
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    totalFailures = 0

    print('')

    analysesToGenerate = OrderedDict()
    # check which analysis we actually want to generate and only keep those
    for analysisTask in analyses:
        # update the dictionary with this task and perhaps its subtasks
        failureCount = add_task_and_subtasks(analysisTask, analysesToGenerate,
                                             verbose)

        totalFailures += failureCount

    if totalFailures > 0:
        print('\n{} tasks and subtasks failed during setup.'.format(
            totalFailures))
        if not verbose:
            print('To find out why these tasks are failing, use the --verbose '
                  'flag')

    print('')

    return analysesToGenerate


[docs]def add_task_and_subtasks(analysisTask, analysesToGenerate, verbose,
                          callCheckGenerate=True):

    """
    If a task has been requested through the generate config option or
    if it is a prerequisite of a requested task, add it to the dictionary of
    tasks to generate.

    Parameters
    ----------
    analysisTask : ``AnalysisTask``
        A task to be added

    analysesToGenerate : ``OrderedDict`` of ``AnalysisTask``
        The list of analysis tasks to be generated, which this call may
        update to include this task and its subtasks

    verbose : bool
        Whether to write out a full stack trace when exceptions occur during
        ``setup_and_check()`` calls for each task

    callCheckGenerate : bool
        Whether the ``check_generate`` method should be call for this task to
        see if it has been requested.  We skip this for subtasks and
        prerequisites, since they are needed by another task regardless of
        whether the user specifically requested them.
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    totalFailures = 0

    key = (analysisTask.taskName, analysisTask.subtaskName)
    if key in analysesToGenerate.keys():
        # The task was already added
        if analysisTask._setupStatus != 'success':
            ValueError("task {} already added but this version was not set up "
                       "successfully. Typically, this indicates two tasks "
                       "with the same full name".format(
                           analysisTask.fullTaskName))
        return totalFailures

    # for each analysis task, check if we want to generate this task
    # and if the analysis task has a valid configuration
    taskTitle = analysisTask.printTaskName
    if callCheckGenerate and not analysisTask.check_generate():
        # we don't need to add this task -- it wasn't requested
        return totalFailures

    # first, we should try to add the prerequisites of this task and its
    # subtasks (if they aren't also subtasks for this task)
    prereqs = analysisTask.runAfterTasks
    for subtask in analysisTask.subtasks:
        for prereq in subtask.runAfterTasks:
            if prereq not in analysisTask.subtasks:
                prereqs.extend(subtask.runAfterTasks)

    for prereq in prereqs:
        failureCount = add_task_and_subtasks(prereq, analysesToGenerate,
                                             verbose, callCheckGenerate=False)
        totalFailures += failureCount
        if prereq._setupStatus != 'success':
            assert(failureCount > 0)
            # a prereq failed setup_and_check
            print("Warning: prerequisite of {} failed during check, "
                  "so this task will not be run".format(
                      taskTitle))
            analysisTask._setupStatus = 'fail'
            totalFailures += 1
            return totalFailures

    # make sure all prereqs have been set up successfully before trying to
    # set up this task -- this task's setup may depend on setup in the prereqs
    try:
        analysisTask.setup_and_check()
    except (Exception, BaseException):
        if verbose:
            traceback.print_exc(file=sys.stdout)
        print("Warning: {} failed during check and will not be run".format(
            taskTitle))
        analysisTask._setupStatus = 'fail'
        totalFailures += 1
        return totalFailures

    # next, we should try to add the subtasks.  This is done after the current
    # analysis task has been set up in case subtasks depend on information
    # from the parent task
    for subtask in analysisTask.subtasks:
        failureCount = add_task_and_subtasks(subtask, analysesToGenerate,
                                             verbose, callCheckGenerate=False)
        totalFailures += failureCount
        if subtask._setupStatus != 'success':
            assert(failureCount > 0)
            # a subtask failed setup_and_check
            print("Warning: subtask of {} failed during check, "
                  "so this task will not be run".format(
                      taskTitle))
            analysisTask._setupStatus = 'fail'
            totalFailures += 1
            return totalFailures

    analysesToGenerate[key] = analysisTask
    analysisTask._setupStatus = 'success'
    assert(totalFailures == 0)
    return totalFailures


[docs]def update_generate(config, generate):
    """
    Update the 'generate' config option using a string from the command line.

    Parameters
    ----------
    config : mpas_tools.config.MpasConfigParser
        contains config options

    generate : str
        a comma-separated string of generate flags: either names of analysis
        tasks or commands of the form ``all_<tag>`` or ``no_<tag>`` indicating
        that analysis with a given tag should be included or excluded).
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    # overwrite the 'generate' in config with a string that parses to
    # a list of string
    generateList = generate.split(',')
    generateString = ', '.join(["'{}'".format(element)
                                for element in generateList])
    generateString = '[{}]'.format(generateString)
    config.set('output', 'generate', generateString, user=True)


[docs]def run_analysis(config, analyses):
    """
    Run all the tasks, either in serial or in parallel

    Parameters
    ----------
    config : mpas_tools.config.MpasConfigParser
        contains config options

    analyses : OrderedDict of ``AnalysisTask`` objects
        A dictionary of analysis tasks to run with (task, subtask) names as
        keys
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    # write the config file the log directory
    logsDirectory = build_config_full_path(config, 'output',
                                           'logsSubdirectory')

    mainRunName = config.get('runs', 'mainRunName')
    maxTitleLength = config.getint('plot', 'maxTitleLength')

    if len(mainRunName) > maxTitleLength:
        print('Warning: The main run name is quite long and will be '
              'truncated in some plots: \n{}\n\n'.format(mainRunName))

    configFileName = '{}/complete.{}.cfg'.format(logsDirectory, mainRunName)

    configFile = open(configFileName, 'w')
    config.write(configFile)
    configFile.close()

    parallelTaskCount = config.getint('execute', 'parallelTaskCount')

    isParallel = parallelTaskCount > 1 and len(analyses) > 1

    for analysisTask in analyses.values():
        if not analysisTask.runAfterTasks and not analysisTask.subtasks:
            analysisTask._runStatus.value = AnalysisTask.READY
        else:
            analysisTask._runStatus.value = AnalysisTask.BLOCKED

    tasksWithErrors = []
    runningTasks = {}

    # redirect output to a log file
    logsDirectory = build_config_full_path(config, 'output',
                                           'logsSubdirectory')

    logFileName = '{}/taskProgress.log'.format(logsDirectory)

    logger = logging.getLogger('mpas_analysis')
    handler = logging.FileHandler(logFileName)

    formatter = AnalysisFormatter()
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)
    logger.propagate = False

    totalTaskCount = len(analyses)
    widgets = ['Running tasks: ', progressbar.Percentage(), ' ',
               progressbar.Bar(), ' ', progressbar.ETA()]
    progress = progressbar.ProgressBar(widgets=widgets,
                                       max_value=totalTaskCount).start()

    runningProcessCount = 0

    # run each analysis task
    while True:
        # we still have tasks to run
        for analysisTask in analyses.values():
            if analysisTask._runStatus.value == AnalysisTask.BLOCKED:
                prereqs = analysisTask.runAfterTasks + analysisTask.subtasks
                prereqStatus = [prereq._runStatus.value for prereq in prereqs]
                if any([runStatus == AnalysisTask.FAIL for runStatus in
                        prereqStatus]):
                    # a prerequisite failed so this task cannot succeed
                    analysisTask._runStatus.value = AnalysisTask.FAIL
                if all([runStatus == AnalysisTask.SUCCESS for runStatus in
                        prereqStatus]):
                    # no unfinished prerequisites so we can run this task
                    analysisTask._runStatus.value = AnalysisTask.READY

        unfinishedCount = 0
        for analysisTask in analyses.values():
            if analysisTask._runStatus.value not in [AnalysisTask.SUCCESS,
                                                     AnalysisTask.FAIL]:
                unfinishedCount += 1

        progress.update(totalTaskCount - unfinishedCount)

        if unfinishedCount <= 0 and runningProcessCount == 0:
            # we're done
            break

        # launch new tasks
        runDirectly = False
        for key, analysisTask in analyses.items():
            if analysisTask._runStatus.value == AnalysisTask.READY:
                if isParallel:
                    newProcessCount = runningProcessCount + \
                        analysisTask.subprocessCount
                    if newProcessCount > parallelTaskCount and \
                            runningProcessCount > 0:
                        # this task should run next but we need to wait for
                        # more processes to finish
                        break

                    logger.info('Running {}'.format(
                        analysisTask.printTaskName))
                    if analysisTask.runDirectly:
                        analysisTask.run(writeLogFile=True)
                        runDirectly = True
                        break
                    else:
                        analysisTask._runStatus.value = AnalysisTask.RUNNING
                        analysisTask.start()
                        runningTasks[key] = analysisTask
                        runningProcessCount = newProcessCount
                        if runningProcessCount >= parallelTaskCount:
                            # don't try to run any more tasks
                            break
                else:
                    analysisTask.run(writeLogFile=False)
                    break

        if isParallel:

            if not runDirectly:
                assert(runningProcessCount > 0)
                # wait for a task to finish
                analysisTask = wait_for_task(runningTasks)
                key = (analysisTask.taskName, analysisTask.subtaskName)
                runningTasks.pop(key)
                runningProcessCount -= analysisTask.subprocessCount

            taskTitle = analysisTask.printTaskName

            if analysisTask._runStatus.value == AnalysisTask.SUCCESS:
                logger.info("   Task {} has finished successfully.".format(
                    taskTitle))
            elif analysisTask._runStatus.value == AnalysisTask.FAIL:
                message = "ERROR in task {}.  See log file {} for " \
                          "details".format(taskTitle,
                                           analysisTask._logFileName)
                logger.error(message)
                print(message)
                tasksWithErrors.append(taskTitle)
            else:
                message = "Unexpected status from in task {}.  This may be " \
                          "a bug.".format(taskTitle)
                logger.error(message)
                print(message)
        else:
            if analysisTask._runStatus.value == AnalysisTask.FAIL:
                sys.exit(1)

    progress.finish()

    # blank line to make sure remaining output is on a new line
    print('')

    handler.close()
    logger.handlers = []

    # raise the last exception so the process exits with an error
    errorCount = len(tasksWithErrors)
    if errorCount == 1:
        print("There were errors in task {}".format(tasksWithErrors[0]))
        sys.exit(1)
    elif errorCount > 0:
        print("There were errors in {} tasks: {}".format(
            errorCount, ', '.join(tasksWithErrors)))
        print("See log files in {} for details.".format(logsDirectory))
        print("The following commands may be helpful:")
        print("  cd {}".format(logsDirectory))
        print("  grep Error *.log")
        sys.exit(1)
    else:
        print('Log files for executed tasks can be found in {}'.format(
            logsDirectory))


[docs]def wait_for_task(runningTasks, timeout=0.1):
    """
    Build a list of analysis modules based on the 'generate' config option.
    New tasks should be added here, following the approach used for existing
    analysis tasks.

    Parameters
    ----------
    runningTasks : dict of ``AnalysisTasks``
        The tasks that are currently running, with task names as keys

    Returns
    -------
    analysisTask : ``AnalysisTasks``
        A task that finished
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    # necessary to have a timeout so we can kill the whole thing
    # with a keyboard interrupt
    while True:
        for analysisTask in runningTasks.values():
            analysisTask.join(timeout=timeout)
            if not analysisTask.is_alive():
                return analysisTask


def purge_output(config):
    outputDirectory = config.get('output', 'baseDirectory')
    if not os.path.exists(outputDirectory):
        print('Output directory {} does not exist.\n'
              'No purge necessary.'.format(outputDirectory))
    else:
        for subdirectory in ['plots', 'logs', 'mpasClimatology', 'mapping',
                             'timeSeries', 'html', 'mask', 'profiles',
                             'histogram']:
            option = '{}Subdirectory'.format(subdirectory)
            directory = build_config_full_path(
                config=config, section='output',
                relativePathOption=option)
            if os.path.exists(directory):
                print('Deleting contents of {}'.format(directory))
                if os.path.islink(directory):
                    os.unlink(directory)
                else:
                    shutil.rmtree(directory)

        for component in ['ocean', 'seaIce']:
            for subdirectory in ['climatology', 'remappedClim']:
                option = '{}Subdirectory'.format(subdirectory)
                section = '{}Observations'.format(component)
                directory = build_config_full_path(
                    config=config, section='output',
                    relativePathOption=option,
                    relativePathSection=section)
                if os.path.exists(directory):
                    print('Deleting contents of {}'.format(directory))
                    if os.path.islink(directory):
                        os.unlink(directory)
                    else:
                        shutil.rmtree(directory)


def build_config(user_config_file, shared_configs, machine_info):
    """
    Create a config parser from a user config file (either main or control)
    and a set of shared config file, also adding the username to the web_portal
    section
    """
    if not os.path.exists(user_config_file):
        raise OSError(f'A config file {user_config_file} was specified but '
                      f'the file does not exist')
    config = MpasConfigParser()
    for config_file in shared_configs:
        if config_file.endswith('.py'):
            # we'll skip config options set in python files
            continue
        config.add_from_file(config_file)
    config.add_user_config(user_config_file)

    if machine_info is not None:
        config.set('web_portal', 'username', machine_info.username)

    return config


def symlink_main_run(config, shared_configs, machine_info):
    """
    Create symlinks to the climatology and time-series directories for the
    main run that has already been computed so we don't have to recompute
    the analysis.
    """

    def link_dir(section, option):
        dest_directory = build_config_full_path(config=config,
                                                section='output',
                                                relativePathOption=option,
                                                relativePathSection=section)
        if not os.path.exists(dest_directory):

            source_directory = build_config_full_path(
                config=main_config, section='output',
                relativePathOption=option, relativePathSection=section)

            if os.path.exists(source_directory):

                dest_base = os.path.split(dest_directory)[0]

                make_directories(dest_base)

                os.symlink(source_directory, dest_directory)

    main_config_file = config.get('runs', 'mainRunConfigFile')
    main_config = build_config(main_config_file, shared_configs, machine_info)

    for subdirectory in ['mpasClimatology', 'timeSeries', 'mapping', 'mask',
                         'profiles']:
        section = 'output'
        option = '{}Subdirectory'.format(subdirectory)
        link_dir(section=section, option=option)

    for component in ['ocean', 'seaIce']:
        for subdirectory in ['climatology', 'remappedClim']:
            section = '{}Observations'.format(component)
            option = '{}Subdirectory'.format(subdirectory)
            link_dir(section=section, option=option)


[docs]def main():
    """
    Entry point for the main script ``mpas_analysis``
    """

    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-v', '--version',
                        action='version',
                        version='mpas_analysis {}'.format(
                                mpas_analysis.__version__),
                        help="Show version number and exit")
    parser.add_argument("--setup_only", dest="setup_only", action='store_true',
                        help="If only the setup phase, not the run or HTML "
                        "generation phases, should be executed.")
    parser.add_argument("--html_only", dest="html_only", action='store_true',
                        help="If only the setup and HTML generation phases, "
                        "not the run phase, should be executed.")
    parser.add_argument("-g", "--generate", dest="generate",
                        help="A list of analysis modules to generate "
                        "(nearly identical generate option in config file).",
                        metavar="ANALYSIS1[,ANALYSIS2,ANALYSIS3,...]")
    parser.add_argument("-l", "--list", dest="list", action='store_true',
                        help="List the available analysis tasks")
    parser.add_argument("-p", "--purge", dest="purge", action='store_true',
                        help="Purge the analysis by deleting the output"
                        "directory before running")
    parser.add_argument("config_file", metavar="CONFIG", type=str, nargs='*',
                        help="config file")
    parser.add_argument("--plot_colormaps", dest="plot_colormaps",
                        action='store_true',
                        help="Make a plot displaying all available colormaps")
    parser.add_argument("--verbose", dest="verbose", action='store_true',
                        help="Verbose error reporting during setup-and-check "
                             "phase")
    parser.add_argument("-m", "--machine", dest="machine",
                        help="The name of the machine for loading machine-"
                             "related config options", metavar="MACH")
    parser.add_argument("--polar_regions", dest="polar_regions",
                        action='store_true',
                        help="Include config options for analysis focused on "
                             "polar regions")
    args = parser.parse_args()

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = MpasConfigParser()

    # add default.cfg to cover default not included in the config files
    # provided on the command line
    config.add_from_package('mpas_analysis', 'default.cfg')

    # Add config options for E3SM supported machines from the mache package
    machine = args.machine

    if machine is None and 'E3SMU_MACHINE' in os.environ:
        machine = os.environ['E3SMU_MACHINE']

    if machine is None:
        machine = discover_machine()

    if machine is not None:
        print(f'Detected E3SM supported machine: {machine}')
        try:
            config.add_from_package('mache.machines', f'{machine}.cfg')
        except FileNotFoundError:

            possible_machines = []
            machine_configs = contents('mache.machines')
            for config in machine_configs:
                if config.endswith('.cfg'):
                    possible_machines.append(os.path.splitext(config)[0])

            possible_machines = '\n  '.join(sorted(possible_machines))
            raise ValueError(
                f'We could not find the machine: {machine}.\n'
                f'Possible machines are:\n  {possible_machines}')

        try:
            config.add_from_package('mpas_analysis.configuration',
                                    f'{machine}.cfg')
        except FileNotFoundError:
            # we don't have a config file for this machine, so we'll just
            # skip it.
            print(f'Warning: no MPAS-Analysis config file found for machine:'
                  f' {machine}')
            pass

    if args.polar_regions:
        config.add_from_package('mpas_analysis', 'polar_regions.cfg')

    if machine is not None:
        # set the username so we can use it in the htmlSubdirectory
        machine_info = MachineInfo(machine=machine)
        config.set('web_portal', 'username', machine_info.username)
    else:
        machine_info = None

    shared_configs = config.list_files()

    for user_config in args.config_file:
        if not os.path.exists(user_config):
            raise OSError(f'Config file {user_config} not found.')

        config.add_user_config(user_config)

    print('Using the following config files:')
    for config_file in config.list_files():
        print(f'   {config_file}')

    if args.list:
        # set this config option so we don't have issues
        config.set('diagnostics', 'baseDirectory', '')
        analyses = build_analysis_list(config, controlConfig=None)
        for analysisTask in analyses:
            print('task: {}'.format(analysisTask.taskName))
            print('    component: {}'.format(analysisTask.componentName)),
            print('    tags: {}'.format(', '.join(analysisTask.tags)))
        sys.exit(0)

    if args.plot_colormaps:
        register_custom_colormaps()
        _plot_color_gradients()
        sys.exit(0)

    if config.has_option('runs', 'controlRunConfigFile'):
        control_config_file = config.get('runs', 'controlRunConfigFile')
        control_config = build_config(control_config_file, shared_configs,
                                      machine_info)

        # replace the log directory so log files get written to this run's
        # log directory, not the control run's
        logs_dir = build_config_full_path(config, 'output', 'logsSubdirectory')

        control_config.set('output', 'logsSubdirectory', logs_dir)

        print('Comparing to control run {} rather than observations. \n'
              'Make sure that MPAS-Analysis has been run previously with the '
              'control config file.'.format(control_config.get('runs',
                                                               'mainRunName')))
    else:
        control_config = None

    if args.purge:
        purge_output(config)

    if config.has_option('runs', 'mainRunConfigFile'):
        symlink_main_run(config, shared_configs, machine_info)

    if args.generate:
        update_generate(config, args.generate)

    if control_config is not None:
        # we want to use the "generate" option from the current run, not
        # the control config file
        control_config.set('output', 'generate', config.get('output',
                                                            'generate'))

    log_dir = build_config_full_path(config, 'output', 'logsSubdirectory')
    make_directories(log_dir)

    update_time_bounds_in_config(config)

    file_cache_maxsize = config.getint('input', 'file_cache_maxsize')
    try:
        xarray.set_options(file_cache_maxsize=file_cache_maxsize)
    except ValueError:
        # xarray version doesn't support file_cache_maxsize yet...
        pass

    start_time = time.time()

    custom_config_files = list(args.config_file)
    for option in ['controlRunConfigFile', 'mainRunConfigFile']:
        if config.has_option('runs', option):
            custom_config_files.append(config.get('runs', option))

    html_base_directory = build_config_full_path(config, 'output',
                                                 'htmlSubdirectory')
    make_directories(html_base_directory)
    for config_filename in custom_config_files:
        config_filename = os.path.abspath(config_filename)
        print(f'copying {config_filename} to HTML dir.')
        basename = os.path.basename(config_filename)
        copyfile(config_filename, f'{html_base_directory}/{basename}')

    analyses = build_analysis_list(config, control_config)
    analyses = determine_analyses_to_generate(analyses, args.verbose)

    setup_duration = time.time() - start_time

    if not args.setup_only and not args.html_only:
        run_analysis(config, analyses)
        run_duration = time.time() - start_time
        m, s = divmod(setup_duration, 60)
        h, m = divmod(int(m), 60)
        print('Total setup time: {}:{:02d}:{:05.2f}'.format(h, m, s))
        m, s = divmod(run_duration, 60)
        h, m = divmod(int(m), 60)
        print('Total run time: {}:{:02d}:{:05.2f}'.format(h, m, s))

    if not args.setup_only:
        generate_html(config, analyses, control_config, custom_config_files)


if __name__ == "__main__":
    main()
Source code for mpas_analysis.__main__

Source code for mpas_analysis.main