# This software is open source software available under the BSD-3 license.
#
# Copyright (c) 2022 Triad National Security, LLC. All rights reserved.
# Copyright (c) 2022 Lawrence Livermore National Security, LLC. All rights
# reserved.
# Copyright (c) 2022 UT-Battelle, LLC. All rights reserved.
#
# Additional copyright and license information can be found in the LICENSE file
# distributed with this code, or at
# https://raw.githubusercontent.com/MPAS-Dev/MPAS-Analysis/main/LICENSE
"""
Utility functions for importing MPAS files into xarray. These functions extend
the capabilities of mpas_xarray to include mapping variable names from MPAS
names to MPAS-Analysis generalized names and support for slicing to given
start and end dates.
open_multifile_dataset : opens a data set, maps variable names, preprocess
    the data set removes repeated time indices, and slices the time coordinate
    to lie between desired start and end dates.
"""
# Authors
# -------
# Xylar Asay-Davis
import xarray
from functools import partial
import resource
from mpas_analysis.shared.mpas_xarray import mpas_xarray
from mpas_analysis.shared.timekeeping.utility import \
    string_to_days_since_date, days_to_datetime
[docs]
def open_multifile_dataset(fileNames, calendar, config,
                           simulationStartTime=None,
                           timeVariableName='Time',
                           variableList=None, selValues=None,
                           iselValues=None, variableMap=None,
                           startDate=None, endDate=None,
                           chunking=None):
    """
    Opens and returns an xarray data set given file name(s) and the MPAS
    calendar name.
    Parameters
    ----------
    fileNames : list of strings
        A lsit of file paths to read
    calendar : {``'gregorian'``, ``'noleap'``}, optional
        The name of one of the calendars supported by MPAS cores
    config : tranche.Tranche
        Contains configuration options
    simulationStartTime : string, optional
        The start date of the simulation, used to convert from time variables
        expressed as days since the start of the simulation to days since the
        reference date. ``simulationStartTime`` takes one of the following
        forms::
            0001-01-01
            0001-01-01 00:00:00
        ``simulationStartTime`` is only required if the MPAS time variable
        (identified by ``timeVariableName``) is a number of days since the
        start of the simulation.
    timeVariableName : string, optional
        The name of the time variable (typically ``'Time'`` if using a
        ``variableMap`` or ``'xtime'`` if not using a ``variableMap``)
    variableList : list of strings, optional
        If present, a list of variables to be included in the data set
    selValues : dict, optional
        A dictionary of coordinate names (keys) and values or arrays of
        values used to slice the variales in the data set.  See
        ``xarray.DataSet.sel()`` for details on how this dictonary is used.
        An example::
            selectCorrdValues = {'cellLon': 180.0}
    iselValues : dict, optional
        A dictionary of coordinate names (keys) and indices, slices or
        arrays of indices used to slice the variales in the data set.  See
        ``xarray.DataSet.isel()`` for details on how this dictonary is used.
        An example::
            iselValues = {'nVertLevels': slice(0, 3),
                          'nCells': cellIDs}
    variableMap : dict, optional
        A dictionary with keys that are variable names used by
        MPAS-Analysis and values that are lists of possible names for the same
        variable in the MPAS dycore that produced the data set (which may
        differ between versions).
    startDate, endDate : string or datetime.datetime, optional
        If present, the first and last dates to be used in the data set.  The
        time variable is sliced to only include dates within this range.
    chunking : None, int, True, dict, optional
        If integer is present, applies maximum chunk size from config file
        value ``maxChunkSize``, otherwise if None do not perform chunking.  If
        True, use automated chunking using default config value
        ``maxChunkSize``. If chunking is a dict use dictionary values for
        chunking.
    Returns
    -------
    ds : ``xarray.Dataset``
    Raises
    ------
    TypeError
        If the time variable has an unsupported type (not a date string,
        a floating-pont number of days since the start of the simulation
        or a ``numpy.datatime64`` object).
    ValueError
        If the time variable is not found in the data set or if the time
        variable is a number of days since the start of the simulation but
        simulationStartTime is None.
    """
    # Authors
    # -------
    # Xylar Asay-Davis, Phillip J. Wolfram
    preprocess_partial = partial(_preprocess,
                                 calendar=calendar,
                                 simulationStartTime=simulationStartTime,
                                 timeVariableName=timeVariableName,
                                 variableList=variableList,
                                 selValues=selValues,
                                 iselValues=iselValues,
                                 variableMap=variableMap,
                                 startDate=startDate,
                                 endDate=endDate)
    ds = xarray.open_mfdataset(fileNames,
                               preprocess=preprocess_partial,
                               combine='nested',
                               concat_dim='Time',
                               decode_times=False)
    ds = mpas_xarray.remove_repeated_time_index(ds)
    if startDate is not None and endDate is not None:
        if isinstance(startDate, str):
            startDate = string_to_days_since_date(dateString=startDate,
                                                  calendar=calendar)
        if isinstance(endDate, str):
            endDate = string_to_days_since_date(dateString=endDate,
                                                calendar=calendar)
    # select only the data in the specified range of dates
    ds = ds.sel(Time=slice(startDate, endDate))
    if ds.sizes['Time'] == 0:
        raise ValueError('The data set contains no Time entries between '
                         'dates {} and {}.'.format(
                             days_to_datetime(startDate, calendar=calendar),
                             days_to_datetime(endDate, calendar=calendar)))
    # process chunking
    if chunking is True:
        # limit chunk size to prevent memory error
        chunking = config.getint('input', 'maxChunkSize')
    ds = mpas_xarray.process_chunking(ds, chunking)
    return ds 
def _preprocess(ds, calendar, simulationStartTime, timeVariableName,
                variableList, selValues, iselValues, variableMap,
                startDate, endDate):
    """
    Performs variable remapping, then calls mpas_xarray.preprocess, to
    perform the remainder of preprocessing.
    Parameters
    ----------
    ds : xarray.DataSet object
        The data set containing an MPAS time variable to be used to build
        an xarray time coordinate and with variable names to be
        substituted.
    calendar : {'gregorian', 'noleap'}
        The name of one of the calendars supported by MPAS cores
        The name of the time variable (typically 'Time' if using a variableMap
        or 'xtime' if not using a variableMap)
    simulationStartTime : string
        The start date of the simulation, used to convert from time variables
        expressed as days since the start of the simulation to days since the
        reference date. `simulationStartTime` takes one of the following
        forms::
            0001-01-01
            0001-01-01 00:00:00
        simulationStartTime is only required if the MPAS time variable
        (identified by time_variable_name) is a number of days since the
        start of the simulation.
    timeVariableName : string
        The name of the time variable (typically 'Time' if using a variable_map
        or 'xtime' if not using a variable_map)
    variableList : list of strings
        If present, a list of variables to be included in the data set
    selValues : dict
        A dictionary of coordinate names (keys) and values or arrays of
        values used to slice the variales in the data set.  See
        xarray.DataSet.sel() for details on how this dictonary is used.
        An example::
            selectCorrdValues = {'cellLon': 180.0}
    iselValues : dict
        A dictionary of coordinate names (keys) and indices, slices or
        arrays of indices used to slice the variales in the data set.  See
        xarray.DataSet.isel() for details on how this dictonary is used.
        An example::
            iselValues = {'nVertLevels': slice(0, 3),
                          'nCells': cellIDs}
    variableMap : dict
        A dictionary with keys that are variable names used by
        MPAS-Analysis and values that are lists of possible names for the same
        variable in the MPAS dycore that produced the data set (which may
        differ between versions).
    startDate, endDate : string or datetime.datetime
        If present, the first and last dates to be used in the data set.  The
        time variable is sliced to only include dates within this range.
    Returns
    -------
    ds : xarray.DataSet object
        A copy of the data set with the time coordinate set and which
        has been sliced.
    """
    # Authors
    # -------
    # Xylar Asay-Davis, Phillip J. Wolfram
    submap = variableMap
    # time_variable_names is a special case so we take it out of the map
    # and handle it manually (adding a new variable rather than renaming
    # an existing one)
    if variableMap is not None and timeVariableName in variableMap:
        # make a copy of variableMap and remove timeVariableName
        submap = variableMap.copy()
        submap.pop(timeVariableName, None)
        # temporarily change the time variable name
        timeVariableName = \
            _map_variable_name(timeVariableName,
                               ds,
                               variableMap)
    if submap is not None:
        ds = _rename_variables(ds, submap)
    # now that the variables are mapped, do the normal preprocessing in
    # mpas_xarray
    ds = mpas_xarray.preprocess(ds,
                                calendar=calendar,
                                simulationStartTime=simulationStartTime,
                                timeVariableName=timeVariableName,
                                variableList=variableList,
                                selValues=selValues,
                                iselValues=iselValues)
    return ds
def _map_variable_name(variableName, ds, variableMap):
    """
    Given a `variableName` in a `variableMap` and an xarray `ds`,
    return the name of the the first variable in `variableMap[variableName]`
    that is found in ds.
    variableMap is a dictionary with keys that are variable names used by
    MPAS-Analysis and values that are lists of possible names for the same
    variable in the MPAS dycore that produced the data set (which may differ
    between versions).
    Parameters
    ----------
    variableName : string
        Name of a variable in `varriableMap`
    ds : `xarray.DataSet` object
        A data set in which the mapped variable name should be found
    variableMap : dict
        A dictionary with keys that are variable names used by
        MPAS-Analysis and values that are lists of possible names for the same
        variable in the MPAS dycore that produced the data set (which may
        differ between versions).
    Returns
    -------
    mappedVariableName : The corresponding variable name to `variableName`
        found in `ds`.
    Raises
    ------
    ValueError
        If none of the possible variable names in `variableMap[variableName]`
        can be found in `ds`.
    """
    # Authors
    # -------
    # Xylar Asay-Davis
    possibleVariables = variableMap[variableName]
    for variable in possibleVariables:
        if isinstance(variable, (list, tuple)):
            allFound = True
            for subvariable in variable:
                if subvariable not in ds.data_vars.keys():
                    allFound = False
                    break
            if allFound:
                return variable
        elif variable in ds.data_vars.keys():
            return variable
    raise ValueError('Variable {} could not be mapped. None of the '
                     'possible mapping variables {}\n match any of the '
                     'variables in {}.'.format(
                         variableName, possibleVariables,
                         ds.data_vars.keys()))
def _rename_variables(ds, variableMap):
    """
    Given an `xarray.DataSet` object `ds` and a dictionary mapping
    variable names `variableMap`, returns a new data set in which variables
    from `ds` with names equal to values in `variableMap` are renamed
    to the corresponding key in `variableMap`.
    Parameters
    ----------
    ds : `xarray.DataSet` object
        A data set in which the mapped variable names should be renamed
    variableMap : dict
        A dictionary with keys that are variable names used by
        MPAS-Analysis and values that are lists of possible names for the same
        variable in the MPAS dycore that produced the data set (which may
        differ between versions).
    Returns
    -------
    outDataSEt : A new `xarray.DataSet` object with the variable renamed.
    """
    # Authors
    # -------
    # Xylar Asay-Davis
    renameDict = {}
    for datasetVariable in ds.data_vars:
        for mapVariable in variableMap:
            renameList = variableMap[mapVariable]
            if datasetVariable in renameList:
                renameDict[datasetVariable] = mapVariable
                break
    return ds.rename(renameDict)
# vim: ai ts=4 sts=4 et sw=4 ft=python