Source code for mpas_tools.io

import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path

import netCDF4
import numpy

from mpas_tools.logging import check_call

default_format = 'NETCDF3_64BIT'
default_engine = None
default_char_dim_name = 'StrLen'
default_fills = netCDF4.default_fillvals
default_nchar = 64


[docs] def write_netcdf( ds, fileName, fillValues=None, format=None, engine=None, char_dim_name=None, logger=None, nchar=None, ): """ Write an xarray.Dataset to a file with NetCDF4 fill values and the given name of the string dimension. Also adds the time and command-line to the history attribute. Note: the ``NETCDF3_64BIT_DATA`` format is handled as a special case because xarray output with this format is not performant. First, the file is written in ``NETCDF4`` format, which supports larger files and variables. Then, the ``ncks`` command is used to convert the file to the ``NETCDF3_64BIT_DATA`` format. Note: All int64 variables are automatically converted to int32 for MPAS compatibility. Parameters ---------- ds : xarray.Dataset The dataset to save fileName : str The path for the NetCDF file to write fillValues : dict, optional A dictionary of fill values for different NetCDF types. Default is ``mpas_tools.io.default_fills``, which can be modified but which defaults to ``netCDF4.default_fillvals`` format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional The NetCDF file format to use. Default is ``mpas_tools.io.default_format``, which can be modified but which defaults to ``'NETCDF3_64BIT'`` engine : {'netcdf4', 'scipy', 'h5netcdf'}, optional The library to use for NetCDF output. The default is the same as in :py:meth:`xarray.Dataset.to_netcdf` and depends on ``format``. You can override the default by setting ``mpas_tools.io.default_engine`` char_dim_name : str, optional The name of the dimension used for character strings. Default is ``mpas_tools.io.default_char_dim_name``, which can be modified but which defaults to ``'StrLen'`` nchar : int, optional The number of characters to use for string variables. If None, the default is ``mpas_tools.io.default_nchar``, which can be modified but which defaults to 64. logger : logging.Logger, optional A logger to write messages to write the output of ``ncks`` conversion calls to. If None, ``ncks`` output is suppressed. This is only relevant if ``format`` is 'NETCDF3_64BIT_DATA' """ # noqa: E501 if format is None: format = default_format if fillValues is None: fillValues = default_fills if engine is None: engine = default_engine if char_dim_name is None: char_dim_name = default_char_dim_name if nchar is None: nchar = default_nchar numpyFillValues = {} for fillType in fillValues: # drop string fill values if not fillType.startswith('S'): numpyFillValues[numpy.dtype(fillType)] = fillValues[fillType] encodingDict = {} variableNames = list(ds.data_vars.keys()) + list(ds.coords.keys()) for variableName in variableNames: var = ds[variableName] encodingDict[variableName] = {} dtype = var.dtype # Convert int64 variables to int32 for MPAS compatibility if dtype == numpy.int64: encodingDict[variableName]['dtype'] = 'int32' # add fill values if dtype in numpyFillValues: if numpy.any(numpy.isnan(var)): # only add fill values if they're needed fill = numpyFillValues[dtype] else: fill = None encodingDict[variableName]['_FillValue'] = fill isString = numpy.issubdtype(dtype, numpy.bytes_) or numpy.issubdtype( dtype, numpy.str_ ) if isString: # set the encoding for string variables encodingDict[variableName].update( {'dtype': f'|S{nchar}', 'char_dim_name': char_dim_name} ) update_history(ds) if 'Time' in ds.dims: # make sure the Time dimension is unlimited because MPAS has trouble # reading Time otherwise ds.encoding['unlimited_dims'] = {'Time'} else: ds.encoding['unlimited_dims'] = None # for performance, we have to handle this as a special case convert = format == 'NETCDF3_64BIT_DATA' if convert: out_path = Path(fileName) out_filename = ( out_path.parent / f'_tmp_{out_path.stem}.netcdf4{out_path.suffix}' ) format = 'NETCDF4' if engine == 'scipy': # that's not going to work engine = 'netcdf4' else: out_filename = fileName ds.to_netcdf( out_filename, encoding=encodingDict, format=format, engine=engine ) if convert: args = [ 'ncks', '-O', '-5', out_filename, fileName, ] # Ensure all args are strings (important for Path objects) args = [str(arg) for arg in args] if logger is None: subprocess.run( args, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) else: check_call(args, logger=logger) # delete the temporary NETCDF4 file os.remove(out_filename)
def update_history(ds): """Add or append history to attributes of a data set""" thiscommand = ( datetime.now().strftime('%a %b %d %H:%M:%S %Y') + ': ' + ' '.join(sys.argv[:]) ) if 'history' in ds.attrs: newhist = '\n'.join([thiscommand, ds.attrs['history']]) else: newhist = thiscommand ds.attrs['history'] = newhist