Source code for compass.validate

import os
import numpy
import xarray
import re
import fnmatch


[docs] def compare_variables(test_case, variables, filename1, filename2=None, l1_norm=0.0, l2_norm=0.0, linf_norm=0.0, quiet=True, check_outputs=True, skip_if_step_not_run=True): """ Compare variables between files in the current test case and/or with the baseline results. The results of the comparison are added to the test case's "validation" dictionary, which the framework can use later to log the test case results and/or to raise an exception to indicate that the test case has failed. Parameters ---------- test_case : compass.TestCase An object describing a test case to validate variables : list A list of variable names to compare filename1 : str The relative path to a file within the ``work_dir``. If ``filename2`` is also given, comparison will be performed with ``variables`` in that file. If a baseline directory was provided when setting up the test case, the ``variables`` will be compared between this test case and the same relative filename in the baseline version of the test case. filename2 : str, optional The relative path to another file within the ``work_dir`` if comparing between files within the current test case. If a baseline directory was provided, the ``variables`` from this file will also be compared with those in the corresponding baseline file. l1_norm : float, optional The maximum allowed L1 norm difference between the variables in ``filename1`` and ``filename2``. To skip L1 norm check, pass None. l2_norm : float, optional The maximum allowed L2 norm difference between the variables in ``filename1`` and ``filename2``. To skip L2 norm check, pass None. linf_norm : float, optional The maximum allowed L-Infinity norm difference between the variables in ``filename1`` and ``filename2``. To skip Linf norm check, pass None. quiet : bool, optional Whether to print detailed information. If quiet is False, the norm tolerance values being compared against will be printed when the comparison is made. This is generally desirable when using nonzero norm tolerance values. check_outputs : bool, optional Whether to check to make sure files are valid outputs of steps in the test case. This should be set to ``False`` if comparing with an output of a step in another test case. skip_if_step_not_run : bool, optional Whether to skip the variable comparison if a user did not run one (or both) of the steps involved in the comparison. This would happen if users are running steps individually or has edited ``steps_to_run`` in the config file to exclude one of the steps. """ work_dir = test_case.work_dir logger = test_case.logger path1 = os.path.abspath(os.path.join(work_dir, filename1)) if filename2 is not None: path2 = os.path.abspath(os.path.join(work_dir, filename2)) else: path2 = None file1_found = False file2_found = False step_name1 = None step_name2 = None for step_name, step in test_case.steps.items(): for output in step.outputs: # outputs are already absolute paths combined with the step dir if output == path1: file1_found = True step_name1 = step_name if output == path2: file2_found = True step_name2 = step_name if check_outputs: if not file1_found: raise ValueError('{} does not appear to be an output of any step ' 'in this test case.'.format(filename1)) if filename2 is not None and not file2_found: raise ValueError('{} does not appear to be an output of any step ' 'in this test case.'.format(filename2)) if skip_if_step_not_run: step1_not_run = (file1_found and step_name1 not in test_case.steps_to_run) step2_not_run = (file2_found and step_name2 not in test_case.steps_to_run) if step1_not_run and step2_not_run: test_case.logger.info( 'Skipping validation because {} and {} weren\'t run'.format( step_name1, step_name2)) elif step1_not_run: test_case.logger.info('Skipping validation because {} wasn\'t ' 'run'.format(step_name1)) elif step2_not_run: test_case.logger.info('Skipping validation because {} wasn\'t ' 'run'.format(step_name2)) if step1_not_run or step2_not_run: return if test_case.validation is not None: validation = test_case.validation else: validation = {'internal_pass': None, 'baseline_pass': None} if filename2 is not None: internal_pass = _compare_variables( variables, path1, path2, l1_norm, l2_norm, linf_norm, quiet, logger) if validation['internal_pass'] is None: validation['internal_pass'] = internal_pass else: validation['internal_pass'] = \ validation['internal_pass'] and internal_pass if test_case.baseline_dir is not None: baseline_root = test_case.baseline_dir baseline_pass = True result = _compare_variables( variables, os.path.join(work_dir, filename1), os.path.join(baseline_root, filename1), l1_norm=0.0, l2_norm=0.0, linf_norm=0.0, quiet=quiet, logger=logger) baseline_pass = baseline_pass and result if filename2 is not None: result = _compare_variables( variables, os.path.join(work_dir, filename2), os.path.join(baseline_root, filename2), l1_norm=0.0, l2_norm=0.0, linf_norm=0.0, quiet=quiet, logger=logger) baseline_pass = baseline_pass and result if validation['baseline_pass'] is None: validation['baseline_pass'] = baseline_pass else: validation['baseline_pass'] = \ validation['baseline_pass'] and baseline_pass test_case.validation = validation
[docs] def compare_timers(test_case, timers, rundir1, rundir2=None): """ Compare variables between files in the current test case and/or with the baseline results. Parameters ---------- test_case : compass.TestCase An object describing a test case to validate timers : list A list of timer names to compare rundir1 : str The relative path to a directory within the ``work_dir``. If ``rundir2`` is also given, comparison will be performed with ``timers`` in that file. If a baseline directory was provided when setting up the test case, the ``timers`` will be compared between this test case and the same relative directory under the baseline version of the test case. rundir2 : str, optional The relative path to another file within the ``work_dir`` if comparing between files within the current test case. If a baseline directory was provided, the ``timers`` from this file will also be compared with those in the corresponding baseline directory. """ work_dir = test_case.work_dir baseline_root = test_case.baseline_dir if rundir2 is not None: _compute_timers(os.path.join(work_dir, rundir1), os.path.join(work_dir, rundir2), timers) if baseline_root is not None: _compute_timers(os.path.join(baseline_root, rundir1), os.path.join(work_dir, rundir1), timers) if rundir2 is not None: _compute_timers(os.path.join(baseline_root, rundir2), os.path.join(work_dir, rundir2), timers)
def _compare_variables(variables, filename1, filename2, l1_norm, l2_norm, linf_norm, quiet, logger): """ compare fields in the two files """ for filename in [filename1, filename2]: if not os.path.exists(filename): logger.error(f'File {filename} does not exist.') return False ds1 = xarray.open_dataset(filename1) ds2 = xarray.open_dataset(filename2) all_pass = True for variable in variables: all_found = True for ds, filename in [(ds1, filename1), (ds2, filename2)]: if variable not in ds: logger.error(f'Variable {variable} not in {filename}.') all_found = False if not all_found: all_pass = False continue da1 = ds1[variable] da2 = ds2[variable] if not numpy.all(da1.dims == da2.dims): logger.error(f"Dimensions for variable {variable} don't match " f"between files {filename1} and {filename2}.") all_pass = False continue all_match = True for dim in da1.sizes: if da1.sizes[dim] != da2.sizes[dim]: logger.error(f"Field sizes for variable {variable} don't " f"match files {filename1} and {filename2}.") all_match = False if not all_match: all_pass = False continue if not quiet: print(" Pass thresholds are:") if l1_norm is not None: print(" L1: {:16.14e}".format(l1_norm)) if l2_norm is not None: print(" L2: {:16.14e}".format(l2_norm)) if linf_norm is not None: print(" L_Infinity: {:16.14e}".format( linf_norm)) variable_pass = True if 'Time' in da1.dims: time_range = range(0, da1.sizes['Time']) time_str = ', '.join(['{}'.format(j) for j in time_range]) print('{} Time index: {}'.format(variable.ljust(20), time_str)) for time_index in time_range: slice1 = da1.isel(Time=time_index) slice2 = da2.isel(Time=time_index) result = _compute_norms(slice1, slice2, quiet, l1_norm, l2_norm, linf_norm, time_index=time_index) variable_pass = variable_pass and result else: print('{}'.format(variable)) result = _compute_norms(da1, da2, quiet, l1_norm, l2_norm, linf_norm) variable_pass = variable_pass and result # ANSI fail text: https://stackoverflow.com/a/287944/7728169 start_fail = '\033[91m' start_pass = '\033[92m' end = '\033[0m' pass_str = '{}PASS{}'.format(start_pass, end) fail_str = '{}FAIL{}'.format(start_fail, end) if variable_pass: print(' {} {}\n'.format(pass_str, filename1)) else: print(' {} {}\n'.format(fail_str, filename1)) print(' {}\n'.format(filename2)) all_pass = all_pass and variable_pass return all_pass def _compute_norms(da1, da2, quiet, max_l1_norm, max_l2_norm, max_linf_norm, time_index=None): """ Compute norms between variables in two DataArrays """ da1 = _rename_duplicate_dims(da1) da2 = _rename_duplicate_dims(da2) result = True diff = numpy.abs(da1 - da2).values.ravel() # skip entries where one field or both are a fill value diff = diff[numpy.isfinite(diff)] l1_norm = numpy.linalg.norm(diff, ord=1) l2_norm = numpy.linalg.norm(diff, ord=2) linf_norm = numpy.linalg.norm(diff, ord=numpy.inf) if time_index is None: diff_str = '' else: diff_str = '{:d}: '.format(time_index) if max_l1_norm is not None: if max_l1_norm < l1_norm: result = False diff_str = '{} l1: {:16.14e} '.format(diff_str, l1_norm) if max_l2_norm is not None: if max_l2_norm < l2_norm: result = False diff_str = '{} l2: {:16.14e} '.format(diff_str, l2_norm) if max_linf_norm is not None: if max_linf_norm < linf_norm: result = False diff_str = '{} linf: {:16.14e} '.format(diff_str, linf_norm) if not quiet or not result: print(diff_str) return result def _compute_timers(base_directory, comparison_directory, timers): """ Find timers and compute speedup between two run directories """ for timer in timers: timer1_found, timer1 = _find_timer_value(timer, base_directory) timer2_found, timer2 = _find_timer_value(timer, comparison_directory) if timer1_found and timer2_found: if timer2 > 0.: speedup = timer1 / timer2 else: speedup = 1.0 percent = (timer2 - timer1) / timer1 print("Comparing timer {}:".format(timer)) print(" Base: {}".format(timer1)) print(" Compare: {}".format(timer2)) print(" Percent Change: {}%".format(percent * 100)) print(" Speedup: {}".format(speedup)) def _find_timer_value(timer_name, directory): """ Find a timer in the given directory """ # Build a regular expression for any two characters with a space between # them. regex = re.compile(r'(\S) (\S)') sub_timer_name = timer_name.replace(' ', '_') timer = 0.0 timer_found = False for file in os.listdir(directory): if not timer_found: # Compare files written using built in MPAS timers if fnmatch.fnmatch(file, "log.*.out"): timer_line_size = 6 name_index = 1 total_index = 2 # Compare files written using GPTL timers elif fnmatch.fnmatch(file, "timing.*"): timer_line_size = 6 name_index = 0 total_index = 3 else: continue with open(os.path.join(directory, file), "r") as stats_file: for block in iter(lambda: stats_file.readline(), ""): new_block = regex.sub(r"\1_\2", block[2:]) new_block_arr = new_block.split() if len(new_block_arr) >= timer_line_size: if sub_timer_name.find(new_block_arr[name_index]) >= 0: try: timer = \ timer + float(new_block_arr[total_index]) timer_found = True except ValueError: pass return timer_found, timer def _rename_duplicate_dims(da): dims = list(da.dims) new_dims = list(dims) duplicates = False for index, dim in enumerate(dims): if dim in dims[index+1:]: duplicates = True suffix = 2 for other_index, other in enumerate(dims[index+1:]): if other == dim: new_dims[other_index + index + 1] = \ '{}_{}'.format(dim, suffix) suffix += 1 if not duplicates: return da da = xarray.DataArray(data=da.values, dims=new_dims) return da