Source code for mpas_analysis.shared.generalized_reader.generalized_reader

# This software is open source software available under the BSD-3 license.
#
# Copyright (c) 2022 Triad National Security, LLC. All rights reserved.
# Copyright (c) 2022 Lawrence Livermore National Security, LLC. All rights
# reserved.
# Copyright (c) 2022 UT-Battelle, LLC. All rights reserved.
#
# Additional copyright and license information can be found in the LICENSE file
# distributed with this code, or at
# https://raw.githubusercontent.com/MPAS-Dev/MPAS-Analysis/main/LICENSE
"""
Utility functions for importing MPAS files into xarray. These functions extend
the capabilities of mpas_xarray to include mapping variable names from MPAS
names to MPAS-Analysis generalized names and support for slicing to given
start and end dates.

open_multifile_dataset : opens a data set, maps variable names, preprocess
    the data set removes repeated time indices, and slices the time coordinate
    to lie between desired start and end dates.
"""
# Authors
# -------
# Xylar Asay-Davis

import xarray
from functools import partial
import resource

from mpas_analysis.shared.mpas_xarray import mpas_xarray
from mpas_analysis.shared.timekeeping.utility import \
    string_to_days_since_date, days_to_datetime


[docs] def open_multifile_dataset(fileNames, calendar, config, simulationStartTime=None, timeVariableName='Time', variableList=None, selValues=None, iselValues=None, variableMap=None, startDate=None, endDate=None, chunking=None): """ Opens and returns an xarray data set given file name(s) and the MPAS calendar name. Parameters ---------- fileNames : list of strings A lsit of file paths to read calendar : {``'gregorian'``, ``'noleap'``}, optional The name of one of the calendars supported by MPAS cores config : mpas_tools.config.MpasConfigParser Contains configuration options simulationStartTime : string, optional The start date of the simulation, used to convert from time variables expressed as days since the start of the simulation to days since the reference date. ``simulationStartTime`` takes one of the following forms:: 0001-01-01 0001-01-01 00:00:00 ``simulationStartTime`` is only required if the MPAS time variable (identified by ``timeVariableName``) is a number of days since the start of the simulation. timeVariableName : string, optional The name of the time variable (typically ``'Time'`` if using a ``variableMap`` or ``'xtime'`` if not using a ``variableMap``) variableList : list of strings, optional If present, a list of variables to be included in the data set selValues : dict, optional A dictionary of coordinate names (keys) and values or arrays of values used to slice the variales in the data set. See ``xarray.DataSet.sel()`` for details on how this dictonary is used. An example:: selectCorrdValues = {'cellLon': 180.0} iselValues : dict, optional A dictionary of coordinate names (keys) and indices, slices or arrays of indices used to slice the variales in the data set. See ``xarray.DataSet.isel()`` for details on how this dictonary is used. An example:: iselValues = {'nVertLevels': slice(0, 3), 'nCells': cellIDs} variableMap : dict, optional A dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). startDate, endDate : string or datetime.datetime, optional If present, the first and last dates to be used in the data set. The time variable is sliced to only include dates within this range. chunking : None, int, True, dict, optional If integer is present, applies maximum chunk size from config file value ``maxChunkSize``, otherwise if None do not perform chunking. If True, use automated chunking using default config value ``maxChunkSize``. If chunking is a dict use dictionary values for chunking. Returns ------- ds : ``xarray.Dataset`` Raises ------ TypeError If the time variable has an unsupported type (not a date string, a floating-pont number of days since the start of the simulation or a ``numpy.datatime64`` object). ValueError If the time variable is not found in the data set or if the time variable is a number of days since the start of the simulation but simulationStartTime is None. """ # Authors # ------- # Xylar Asay-Davis, Phillip J. Wolfram preprocess_partial = partial(_preprocess, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timeVariableName, variableList=variableList, selValues=selValues, iselValues=iselValues, variableMap=variableMap, startDate=startDate, endDate=endDate) ds = xarray.open_mfdataset(fileNames, preprocess=preprocess_partial, combine='nested', concat_dim='Time', decode_times=False) ds = mpas_xarray.remove_repeated_time_index(ds) if startDate is not None and endDate is not None: if isinstance(startDate, str): startDate = string_to_days_since_date(dateString=startDate, calendar=calendar) if isinstance(endDate, str): endDate = string_to_days_since_date(dateString=endDate, calendar=calendar) # select only the data in the specified range of dates ds = ds.sel(Time=slice(startDate, endDate)) if ds.sizes['Time'] == 0: raise ValueError('The data set contains no Time entries between ' 'dates {} and {}.'.format( days_to_datetime(startDate, calendar=calendar), days_to_datetime(endDate, calendar=calendar))) # process chunking if chunking is True: # limit chunk size to prevent memory error chunking = config.getint('input', 'maxChunkSize') ds = mpas_xarray.process_chunking(ds, chunking) return ds
def _preprocess(ds, calendar, simulationStartTime, timeVariableName, variableList, selValues, iselValues, variableMap, startDate, endDate): """ Performs variable remapping, then calls mpas_xarray.preprocess, to perform the remainder of preprocessing. Parameters ---------- ds : xarray.DataSet object The data set containing an MPAS time variable to be used to build an xarray time coordinate and with variable names to be substituted. calendar : {'gregorian', 'noleap'} The name of one of the calendars supported by MPAS cores The name of the time variable (typically 'Time' if using a variableMap or 'xtime' if not using a variableMap) simulationStartTime : string The start date of the simulation, used to convert from time variables expressed as days since the start of the simulation to days since the reference date. `simulationStartTime` takes one of the following forms:: 0001-01-01 0001-01-01 00:00:00 simulationStartTime is only required if the MPAS time variable (identified by time_variable_name) is a number of days since the start of the simulation. timeVariableName : string The name of the time variable (typically 'Time' if using a variable_map or 'xtime' if not using a variable_map) variableList : list of strings If present, a list of variables to be included in the data set selValues : dict A dictionary of coordinate names (keys) and values or arrays of values used to slice the variales in the data set. See xarray.DataSet.sel() for details on how this dictonary is used. An example:: selectCorrdValues = {'cellLon': 180.0} iselValues : dict A dictionary of coordinate names (keys) and indices, slices or arrays of indices used to slice the variales in the data set. See xarray.DataSet.isel() for details on how this dictonary is used. An example:: iselValues = {'nVertLevels': slice(0, 3), 'nCells': cellIDs} variableMap : dict A dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). startDate, endDate : string or datetime.datetime If present, the first and last dates to be used in the data set. The time variable is sliced to only include dates within this range. Returns ------- ds : xarray.DataSet object A copy of the data set with the time coordinate set and which has been sliced. """ # Authors # ------- # Xylar Asay-Davis, Phillip J. Wolfram submap = variableMap # time_variable_names is a special case so we take it out of the map # and handle it manually (adding a new variable rather than renaming # an existing one) if variableMap is not None and timeVariableName in variableMap: # make a copy of variableMap and remove timeVariableName submap = variableMap.copy() submap.pop(timeVariableName, None) # temporarily change the time variable name timeVariableName = \ _map_variable_name(timeVariableName, ds, variableMap) if submap is not None: ds = _rename_variables(ds, submap) # now that the variables are mapped, do the normal preprocessing in # mpas_xarray ds = mpas_xarray.preprocess(ds, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timeVariableName, variableList=variableList, selValues=selValues, iselValues=iselValues) return ds def _map_variable_name(variableName, ds, variableMap): """ Given a `variableName` in a `variableMap` and an xarray `ds`, return the name of the the first variable in `variableMap[variableName]` that is found in ds. variableMap is a dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). Parameters ---------- variableName : string Name of a variable in `varriableMap` ds : `xarray.DataSet` object A data set in which the mapped variable name should be found variableMap : dict A dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). Returns ------- mappedVariableName : The corresponding variable name to `variableName` found in `ds`. Raises ------ ValueError If none of the possible variable names in `variableMap[variableName]` can be found in `ds`. """ # Authors # ------- # Xylar Asay-Davis possibleVariables = variableMap[variableName] for variable in possibleVariables: if isinstance(variable, (list, tuple)): allFound = True for subvariable in variable: if subvariable not in ds.data_vars.keys(): allFound = False break if allFound: return variable elif variable in ds.data_vars.keys(): return variable raise ValueError('Variable {} could not be mapped. None of the ' 'possible mapping variables {}\n match any of the ' 'variables in {}.'.format( variableName, possibleVariables, ds.data_vars.keys())) def _rename_variables(ds, variableMap): """ Given an `xarray.DataSet` object `ds` and a dictionary mapping variable names `variableMap`, returns a new data set in which variables from `ds` with names equal to values in `variableMap` are renamed to the corresponding key in `variableMap`. Parameters ---------- ds : `xarray.DataSet` object A data set in which the mapped variable names should be renamed variableMap : dict A dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). Returns ------- outDataSEt : A new `xarray.DataSet` object with the variable renamed. """ # Authors # ------- # Xylar Asay-Davis renameDict = {} for datasetVariable in ds.data_vars: for mapVariable in variableMap: renameList = variableMap[mapVariable] if datasetVariable in renameList: renameDict[datasetVariable] = mapVariable break return ds.rename(renameDict) # vim: ai ts=4 sts=4 et sw=4 ft=python