Source code for pysatModels.utils.extract

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2022, pysat development team
# Full license can be found in License.md
# -----------------------------------------------------------------------------
"""Routines to extract observational-style data from model output."""

import numpy as np
import re
import scipy.interpolate as interpolate

import pandas as pds
import pysat.utils as pyutils

import pysatModels as ps_mod
from pysatModels.utils.testing import compare_mod_name_coordinates


[docs]def instrument_altitude_to_model_pressure(inst, model, inst_name, mod_name,
                                          mod_datetime_name, mod_time_name,
                                          mod_units, inst_alt, mod_alt,
                                          mod_alt_units, scale=100.,
                                          inst_out_alt='model_altitude',
                                          inst_out_pres='model_pressure',
                                          tol=1.0):
    """Interpolates altitude values onto model pressure levels.

    Parameters
    ----------
    inst : pysat.Instrument
        Instrument object with observational data
    model : xarray.Dataset
        Model data in xarray format
    inst_name : array-like
        List of variable names containing the observational data coordinates
        at which the model data will be interpolated. Must be in the same order
        as mod_name.
    mod_name : array-like
        list of names of the coordinates to use for determining model locations.
        Must be in the same order as `mod_alt` is stored within xarray.
        The coordinates must make up a regular grid.
    mod_datetime_name : str
        Name of the data series in the model Dataset containing datetime info
    mod_time_name : str
        Name of the time coordinate in the model Dataset
    mod_units : list
        units for each of the mod_name location attributes.  Currently
        supports: rad/radian(s), deg/degree(s), h/hr(s)/hour(s), m, km, and cm
    inst_alt : str
        String identifier used in inst for the altitude variable
    mod_alt : str
        Variable identifier for altitude data in the model
        e.g. 'ZG' in standard TIEGCM files.
    mod_alt_units : str
        units for the altitude variable. Currently supports: m, km, and cm
    scale : float
        Scalar used to roughly translate a change in altitude with a
        change in pressure level, the scale height. Same units as used by inst.
        (default=100.)
    inst_out_alt : str
        Label assigned to the model altitude data when attached to inst
        (default='model_altitude').
    inst_out_pres : str
        Label assigned to the model pressure level when attached to inst
        (default='model_pressure').
    tol : float
        Allowed difference between observed and modelled altitudes.
        Interpreted to have the same units as `inst_alt` (default=1.0).

    Returns
    -------
    [inst_out_alt, inst_out_pres] : list
        List of keys corresponding to the modelled data that was added to the
        instrument.

    Raises
    ------
    ValueError
        For incorrect input arguments

    Notes
    -----
    Uses an iterative regular grid interpolation to find the
    appropriate pressure level for the given input locations.

    """

    # Ensure the coordinate and data variable names are array-like
    inst_name = pyutils.listify(inst_name)
    mod_name = pyutils.listify(mod_name)

    # Test input
    if len(inst_name) == 0:
        estr = 'Must provide inst_name as a list of strings.'
        raise ValueError(estr)

    if len(mod_name) == 0:
        estr = 'Must provide mod_name as a list of strings.'
        raise ValueError(estr)

    if len(inst_name) != len(mod_name):
        estr = 'Must provide the same number of instrument and model '
        estr += 'location attribute names as a list'
        raise ValueError(estr)

    if len(mod_name) != len(mod_units):
        raise ValueError(''.join(['Must provide units for each model location',
                                  ' attribute']))

    if mod_time_name not in model.coords:
        raise ValueError("Unknown model time coordinate key name")

    if inst_alt not in inst.variables:
        raise ValueError("Unknown Instrument altitude key name")

    if mod_alt not in model.data_vars:
        raise ValueError("Unknown Model altitude key name")

    # Determine the scaling between model and instrument data
    inst_scale = np.ones(shape=len(inst_name), dtype=float)
    for i, iname in enumerate(inst_name):
        if iname not in inst.data.keys():
            raise ValueError(''.join(['Unknown instrument location index ',
                                      '{:}'.format(iname)]))
        # Altitude variable check
        if iname == inst_alt:
            alt_scale = pyutils.scale_units(
                mod_alt_units, inst.meta[iname, inst.meta.labels.units])
            inst_scale[i] = 1.
        else:
            inst_scale[i] = pyutils.scale_units(
                mod_units[i], inst.meta[iname, inst.meta.labels.units])

    # Pull out datetime data
    if mod_datetime_name in model.data_vars:
        mod_datetime = model.data_vars[mod_datetime_name]
        mod_datetime = mod_datetime.values.astype(np.int64)
    elif mod_datetime_name in model.coords:
        mod_datetime = model.coords[mod_datetime_name].values.astype(np.int64)
    else:
        raise ValueError("".join(["unknown model name for datetime: ",
                                  mod_datetime_name]))

    # Check input `mod_name` against actual data ordering for `mod_alt`
    compare_mod_name_coordinates(model[mod_alt], mod_name)

    # Create initial fake regular grid index in inst
    inst_model_coord = inst[inst_name[0]] * 0

    # We need to create altitude index from the model.
    # First, handle model locations for interpolation.
    # We use the dimensions associated with model altitude
    # in the order provided.
    points = [model[dim].values / temp_scale
              for dim, temp_scale in zip(mod_name, inst_scale)]
    # Add time in first
    points.insert(0, mod_datetime)

    # Create interpolator
    interp = interpolate.RegularGridInterpolator(points,
                                                 np.log(model[mod_alt].values
                                                        / alt_scale),
                                                 bounds_error=False,
                                                 fill_value=None)
    # Use this interpolator to figure out what altitudes we are at.
    # Each loop, use the current estimated path through model expressed in
    # pressure and interp above to get the equivalent altitude of this path.
    # Compare this altitude to the actual instrument altitude and
    # shift the equivalent pressure for the instrument up/down
    # until difference between altitudes is small.

    # Log of instrument altitude
    log_ialt = np.log(inst[inst_alt])

    # Initial difference signal
    diff = log_ialt * 0 + 2.0 * tol
    while np.any(np.abs(diff) > tol):
        # Create input array using satellite time/position.
        # Replace the altitude coord with the fake tiegcm one
        coords = []
        for iscale, coord in zip(inst_scale, inst_name):
            if coord == inst_alt:
                # Don't scale altitude-like model coordinate
                coords.append(inst_model_coord)
            else:
                # Scale other dimensions to the model
                coords.append(inst[coord] * iscale)

        coords.insert(0, inst.index.values.astype(int))
        # To peform the interpolation we need points
        # like (x1, y1, z1), (x2, y2, z2)
        # but we currently have a list like
        # [(x1, x2, x3), (y1, y2, y3), ...].
        # To convert to the form we need the * below breaks each main list out,
        # and zip repeatedly outputs a tuple (xn, yn, zn).
        sat_pts = [inp for inp in zip(*coords)]

        # Altitude pulled out from model
        orbit_alt = interp(sat_pts)
        # Difference in altitude
        diff = np.e ** orbit_alt - np.e ** log_ialt
        # Shift index in inst for model pressure level in the opposite direction
        # to diff, reduced in value by scale, the 'scale height'
        inst_model_coord -= diff / scale

    # Store achieved model altitude
    inst[inst_out_alt] = np.e ** orbit_alt
    notes_str = ''.join(('Interpolated Model altitude corresponding to `',
                         inst_out_pres, '` pressure level.',
                         inst.meta[inst_alt, inst.meta.labels.notes]))
    inst.meta[inst_out_alt] = {inst.meta.labels.units:
                               inst.meta[inst_alt, inst.meta.labels.units],
                               inst.meta.labels.notes: notes_str}

    # Add pressure level that goes with altitude to Instrument
    inst[inst_out_pres] = inst_model_coord
    notes_str = ''.join(('Interpolated Model pressure corresponding to `',
                         inst_out_alt, '` altitude level.'))

    # TODO(#95) add units from Model when that input is upgraded to be
    # a pysat Custom compatible method
    inst.meta[inst_out_pres] = {inst.meta.labels.notes: notes_str}

    return [inst_out_alt, inst_out_pres]


[docs]def instrument_view_through_model(inst, model, inst_name, mod_name,
                                  mod_datetime_name, mod_time_name,
                                  mod_units, sel_name=None,
                                  methods=['linear'], model_label='model'):
    """Interpolates model values onto instrument locations.

    Parameters
    ----------
    inst : pysat.Instrument
        Instrument object with observational data
    model : xarray.Dataset
        Modelled data
    inst_name : array-like
        List of variable names containing the observational data coordinates
        at which the model data will be interpolated. Do not include 'time',
        only spatial coordinates.
    mod_name : array-like
        List of model dimension names used for organizing model data
        in the same order as `inst_name`.  These must make up a regular grid.
        Do not include 'time', only spatial dimensions.
    mod_datetime_name : str
        Name of the data series in the model Dataset containing datetime info.
    mod_time_name : str
        Name of the time coordinate in the model Dataset.
    mod_units : list
        Units for each of the mod_name location dimensions.  Currently
        supports: rad/radian(s), deg/degree(s), h/hr(s)/hour(s), m, km, and cm
    sel_name : array-like or NoneType
        List of names of modelled data indices to append to Instrument object,
        or None to append all modelled data. (default=None)
    methods : str
        'linear' interpolation or 'nearest' neighbor options for RegularGrid.
        Must supply an option for each variable. (default=['linear'])
    model_label : str
        Name of model, used to identify interpolated data values in instrument
        (default="model")

    Returns
    -------
    interp_data.keys() : Keys
        Keys of modelled data added to the instrument

    Raises
    ------
    ValueError
        For incorrect input arguments

    Note
    ----
    Updates the inst Instrument with interpolated data from the model
    Instrument. The interpolation is performed via the RegularGridInterpolator
    for quick performance.

    This method may require the use of a pre-processor on coordinate
    dimensions to ensure that a regular interpolation may actually be
    performed.

    Models, such as TIEGCM, have a regular grid in pressure, not in altitude.
    To use this routine for TIEGCM please use
    instrument_altitude_to_model_pressure first to transform instrument
    altitudes to pressure levels suitable for this method.

    Variables that vary exponentially in height
    may be approximated by taking a log before interpolating, though
    this does also yield an exponential variation along the horizontal
    directions as well.

    Expects units strings to have the units as the first word, if a long
    description is provided (e.g., 'degrees', 'degrees North', or 'deg_N' and
    not 'geographic North degrees')

    See Also
    --------
    pysat.utils.scale_units

    """

    # Ensure the coordinate and data variable names are array-like
    inst_name = pyutils.listify(inst_name)
    mod_name = pyutils.listify(mod_name)
    methods = pyutils.listify(methods)

    # Interpolate over all variables if None provided
    if sel_name is None:
        sel_name = pyutils.listify(model.data_vars.keys())
    else:
        sel_name = pyutils.listify(sel_name)

    if len(methods) != len(sel_name):
        estr = ' '.join(('Must provide interpolation selection',
                        'for each variable via methods keyword.'))
        raise ValueError(estr)

    for method in methods:
        if method not in ['linear', 'nearest']:
            estr = ''.join(('Methods only supports "linear" or "nearest".',
                            ' Not ', method))
            raise ValueError(estr)

    # Test input
    if len(inst_name) == 0:
        estr = 'Must provide inst_name as a list of strings.'
        raise ValueError(estr)

    if len(mod_name) == 0:
        estr = 'Must provide mod_name as a list of strings.'
        raise ValueError(estr)

    if len(sel_name) == 0:
        estr = 'Must provide sel_name as a list of strings.'
        raise ValueError(estr)

    if len(inst_name) != len(mod_name):
        estr = 'Must provide the same number of instrument and model '
        estr += 'location attribute names as a list'
        raise ValueError(estr)

    if len(mod_name) != len(mod_units):
        raise ValueError(''.join(['Must provide units for each model location',
                                  ' attribute']))

    if mod_time_name not in model.coords:
        raise ValueError("Unknown model time coordinate key name")

    for name in sel_name:
        if name not in model:
            estr = ''.join((name, ' is not a valid model variable.'))
            raise ValueError(estr)

    if mod_datetime_name in model.data_vars:
        mod_datetime = model.data_vars[mod_datetime_name]
        mod_datetime = mod_datetime.values.astype(np.int64)
    elif mod_datetime_name in model.coords:
        mod_datetime = model.coords[mod_datetime_name].values.astype(np.int64)
    else:
        raise ValueError("".join(["unknown model name for datetime: ",
                                  mod_datetime_name]))

    # Determine the scaling between model and instrument data
    inst_scale = np.ones(shape=len(inst_name), dtype=np.float64)
    for i, iname in enumerate(inst_name):
        if iname not in inst.data.keys():
            raise ValueError(''.join(['Unknown instrument location index ',
                                      '{:}'.format(iname)]))

        # Some units may have extra information (e.g., 'degrees North').
        # Use only the actual units in the scaling function.  These are assumed
        # to come first.
        long_units = re.split(r"\W+|_",
                              inst.meta[iname, inst.meta.labels.units])[0]
        inst_scale[i] = pyutils.scale_units(mod_units[i], long_units)

    del_list = list()
    keep_list = list()
    for mdat in sel_name:
        mname = '_'.join((model_label, mdat))
        if mname in inst.data.keys():
            ps_mod.logger.warning("".join(["model data already interpolated:",
                                           " {:}".format(mname)]))
            del_list.append(mdat)
        else:
            keep_list.append(mdat)

    if len(sel_name) == len(del_list):
        raise ValueError("instrument object already contains all model data")
    elif len(del_list) > 0:
        sel_name = keep_list

    # Check input `mod_name` against actual data ordering for `sel_name`
    for sname in sel_name:
        compare_mod_name_coordinates(model[sname], mod_name)

    # Create inst input based upon provided dimension names
    coords = [inst[coord_name] for coord_name in inst_name]

    # Time goes first
    coords.insert(0, inst.index.values.astype(np.int64))

    # Move from a list of lists [ [x1, x2, ...], [y1, y2, ...]]
    # to a list of tuples
    # [(x1, y1, ...), (x2, y2, ...)]
    # required for regulargrid interpolator
    inst_pts = [inp for inp in zip(*coords)]

    # Perform the interpolation
    interp = {}
    output_names = []
    for label, method in zip(sel_name, methods):
        # Determine the unit scaling between model and instrument data
        inst_scale = np.ones(shape=len(inst_name), dtype=float)

        # Collect model grid points together
        points = []

        # Time dim first
        points.append(mod_datetime)

        # Now spatial
        for iscale, var in zip(inst_scale, mod_name):
            points.append(model[var].values / iscale)

        # Create the interpolator
        interp[label] = interpolate.RegularGridInterpolator(
            points, model[label].values, bounds_error=False, fill_value=None,
            method=method)

        # Apply it at observed locations and store result
        output_names.append('_'.join((model_label, label)))
        inst[output_names[-1]] = interp[label](inst_pts)

    return output_names


[docs]def interp_inst_w_irregular_model_coord(inst, model, inst_name, mod_name,
                                        mod_datetime_name, mod_units,
                                        mod_reg_dim, mod_irreg_var,
                                        mod_var_delta, sel_name=None,
                                        model_label='model'):
    """Interpolate irregular-coordinate model data onto Instrument path.

    Parameters
    ----------
    inst : pysat.Instrument
        pysat object that will receive interpolated data based upon position.
    model : pysat.Instrument
        Xarray pysat Instrument with model data that will be interpolated onto
        the `inst` locations.
    inst_name : list
        List of variable names containing the instrument data coordinates
        at which the model data will be interpolated. Do not include 'time',
        only spatial coordinates. Same ordering as used by mod_name.
    mod_name : list
        List of names of the data dimensions used to organize model data,
        in the same order as `inst_name`. These dimensions must make up a
        regular grid. Values from `mod_irreg_var` will be used to replace
        one of these regular dimensions, `mod_reg_dim`, with irregular values.
    mod_datetime_name : str
        Name of the data series in the model Dataset containing datetime info.
    mod_units : list
        Units for each of the `mod_name` dimensions. Users must provide units
        for `mod_irreg_var' instead of the units for `mod_reg_dim`.
        Currently supports: rad/radian(s), deg/degree(s), h/hr(s)/hour(s), m,
        km, and cm.
    mod_reg_dim : str
        Existing regular dimension name (must be in `mod_name`) used to
        organize model data that will be replaced with values from
        `mod_irreg_var` before performing interpolation.
    mod_irreg_var : str
        Variable name in model used to define the irregular grid value
        locations along `mod_reg_dim`. Must have same dimensions as `mod_name`.
    mod_var_delta : list
        List of delta values to be used when downselecting model values
        before interpolation, max(min(inst) - delta, min(model)) <= val <=
        min(max(inst) + delta, max(model)). Interpreted in the same order as
        `mod_name`.
    sel_name : list
        List of strings denoting model variable names that will be
        interpolated onto inst. The coordinate dimensions for these variables
        must correspond to those in `mod_irreg_var`.
    model_label : str
        Name of model, used to identify interpolated data values in instrument
        (default="model")

    Returns
    -------
    output_names : list
        Keys of interpolated model data added to the instrument

    Raises
    ------
    ValueError
        For incorrect input arguments

    Notes
    -----
    Expects units strings to have the units as the first word, if a long
    description is provided (e.g., 'degrees', 'degrees North', or 'deg_N' and
    not 'geographic North degrees').

    See Also
    --------
    pysat.utils.scale_units

    """

    # Ensure the inputs are array-like
    inst_name = pyutils.listify(inst_name)

    # interp over all vars if None provided
    if sel_name is None:
        sel_name = list(model.data_vars.keys())
    else:
        sel_name = pyutils.listify(sel_name)

    # Test input
    if len(inst_name) == 0:
        estr = 'Must provide inst_name as a list of strings.'
        raise ValueError(estr)

    if len(mod_name) == 0:
        estr = 'Must provide mod_name as a list of strings.'
        raise ValueError(estr)

    if len(sel_name) == 0:
        estr = 'Must provide sel_name as a list of strings.'
        raise ValueError(estr)

    if len(mod_var_delta) == 0:
        estr = 'Must provide mod_var_delta as a list of floats.'
        raise ValueError(estr)

    if len(mod_var_delta) != len(mod_name):
        estr = ' '.join(['Must provide the same number of model deltas',
                         'and dimension names.'])
        raise ValueError(estr)

    if len(inst_name) != len(mod_name):
        estr = ' '.join(['Must provide the same number of instrument and model',
                         'location attribute names as a list'])
        raise ValueError(estr)

    if len(mod_name) != len(mod_units):
        raise ValueError(''.join(['Must provide units for each model location',
                                  ' attribute']))

    # Ensure all variables in sel_name exist
    for iname in sel_name:
        if iname not in model:
            raise ValueError(''.join(['Unknown model variable index ',
                                      '{:}'.format(iname)]))

    # Ensure that `mod_irreg_var` exists
    if mod_irreg_var not in model:
        raise ValueError(''.join(['Unknown irregular model variable index ',
                                  '{:}'.format(mod_irreg_var)]))

    # Ensure coordinate dimensions match
    for var in sel_name:
        if model[var].dims != model[mod_irreg_var].dims:
            estr = ' '.join(('Coordinate dimensions must match for',
                             '"mod_irreg_var" and', model[var].name))
            raise ValueError(estr)

    # Ensure mod_reg_dim in mod_irreg_var
    if mod_reg_dim not in model[mod_irreg_var].dims:
        estr = 'mod_reg_dim must be a coordinate dimension for mod_irreg_var.'
        raise ValueError(estr)

    # Determine the scaling between model and instrument data
    inst_scale = np.ones(shape=len(inst_name), dtype=float)
    for i, iname in enumerate(inst_name):
        if iname not in inst.data.keys():
            raise ValueError(''.join(['Unknown instrument location index ',
                                      '{:}'.format(iname)]))
        if mod_name[i] != mod_reg_dim:
            # Some units may have extra information (e.g., 'degrees North').
            # Use only the actual units in the scaling function. These are
            # assumed to come first.
            long_units = re.split(r"\W+|_",
                                  inst.meta[iname, inst.meta.labels.units])[0]
            inst_scale[i] = pyutils.scale_units(mod_units[i], long_units)
        else:
            # TODO(#95) update this else to use metadata from
            #  `mod_irreg_var` when made a pysat compatible fuction.
            long_units = re.split(r"\W+|_",
                                  inst.meta[iname, inst.meta.labels.units])[0]
            inst_scale[i] = pyutils.scale_units(mod_units[i], long_units)

            # Store scaling for mod_irreg_var
            dvar_scale = inst_scale[i]

    # Pull out datetime data
    if mod_datetime_name in model.data_vars:
        mod_datetime = model.data_vars[mod_datetime_name]
        mod_datetime = mod_datetime.values.astype(np.int64)
    elif mod_datetime_name in model.coords:
        mod_datetime = model.coords[mod_datetime_name].values.astype(np.int64)
    else:
        raise ValueError("".join(["unknown model name for datetime: ",
                                  mod_datetime_name]))

    # Check input `mod_name` against actual data ordering for `sel_name`
    for sname in sel_name:
        compare_mod_name_coordinates(model[sname], mod_name)

    # Perform same check against `mod_irreg_var`
    compare_mod_name_coordinates(model[mod_irreg_var], mod_name)

    # First, model locations for interpolation (regulargrid)
    coords = [model[dim].values / temp_scale
              for dim, temp_scale in zip(mod_name, inst_scale)]

    # Time first
    coords.insert(0, mod_datetime)

    # Translate regular locations to equivalent irregular ones
    # pull out irregular grid locations for variables that will be interpolated
    dvar = model[mod_irreg_var]

    # Make a mesh of data coordinate location values. Start by getting the
    # total number of elements and find which dimension will be updated
    num_pts = 1
    update_dim = -1000
    for i, dim in enumerate(dvar.dims):
        num_pts *= len(dvar.coords[dim])
        if dim == mod_reg_dim:
            update_dim = i

    # Array to store irregular locations of measurements
    points = np.zeros((num_pts, 4))

    # Create mesh corresponding to coordinate values and store
    pts = np.meshgrid(*coords, indexing='ij')
    for i, pt in enumerate(pts):
        points[:, i] = np.ravel(pt)

    # Replace existing regular dimension with irregular data. Model values
    # need to account for any change in units wrt the Instrument.
    points[:, update_dim] = np.ravel(dvar) / dvar_scale

    # Pull out model data to be interpolated. Needed so same downselection
    # procedure may be applied to data as to dimensions.
    model_vars = []
    for iname in sel_name:
        model_vars.append(np.ravel(model[iname].values))

    # Downselect all model points to those within some range of the Instrument
    # values. For each of the model dimensions, each with a corresponding
    # instrument variable, determine the selection criteria and store the
    # limits.
    for i, iname in enumerate(inst_name):
        # Range from Instrument values
        min_val = inst[iname].min()
        max_val = inst[iname].max()

        # Range from model values
        min_mval = np.nanmin(points[:, i + 1])
        max_mval = np.nanmax(points[:, i + 1])

        # Net range
        min_val = np.max([min_val - mod_var_delta[i], min_mval])
        max_val = np.min([max_val + mod_var_delta[i], max_mval])

        # Determine which points are within the specified tolerance range.
        idx, = np.where((points[:, i + 1] >= min_val)
                        & (points[:, i + 1] <= max_val))

        # Downselect model dimension values
        points = points[idx, :]

        # Perform downselection of model variables as well
        for j, ivar in enumerate(model_vars):
            model_vars[j] = ivar[idx]

    ps_mod.logger.debug('Remaining points after downselection {:d}'.format(
        len(idx)))

    # Create input array using inst time/position
    coords = [inst[coord] for coord in inst_name]
    coords.insert(0, inst.index.values.astype(np.int64))
    sat_pts = [inp for inp in zip(*coords)]

    # Perform interpolation of user desired variables
    output_names = []
    for var, idata in zip(sel_name, model_vars):
        ps_mod.logger.debug(''.join(['Creating interpolation object for ',
                                     var]))
        output_names.append('_'.join((model_label, var)))
        inst[output_names[-1]] = \
            interpolate.griddata(points, idata, sat_pts, rescale=True)
        ps_mod.logger.debug(' '.join([var, 'complete.']))

    return output_names


[docs]def extract_modelled_observations(inst, model, inst_name, mod_name,
                                  mod_datetime_name, mod_time_name, mod_units,
                                  sel_name=None, time_method='min',
                                  pair_method='closest', method='linear',
                                  model_label='model',
                                  model_units_attr='units'):
    """Extract instrument-aligned data from a modelled data set.

    Parameters
    ----------
    inst : pysat.Instrument
        Instrument object for which modelled data will be extracted
    model : xarray.Dataset
        Modelled data set
    inst_name : array-like
        List of names of the data series to use for determining instrument
        location
    mod_name : array-like
        List of names of the data series to use for determining model locations
        in the same order as inst_name.  These must make up a regular grid.
    mod_datetime_name : str
        Name of the data series in the model Dataset containing datetime info
    mod_time_name : str
        Name of the time coordinate in the model Dataset
    mod_units : list of strings
        Units for each of the mod_name location attributes.  Currently
        supports: rad/radian(s), deg/degree(s), h/hr(s)/hour(s), m, km, and cm
    sel_name : array-like or NoneType
        list of names of modelled data indices to append to instrument object,
        or None to append all modelled data (default=None)
    time_method : str
        Pair data using larger (max) or smaller (min) of the smallest
        instrument/model time increments (default='min')
    pair_method : str
        Find all relevent pairs ('all') or just the closest pairs ('closest').
        (default='closest')
    method : str
        Interpolation method.  Supported are 'linear', 'nearest', and
        'splinef2d'.  The last is only supported for 2D data and is not
        recommended here.  (default='linear')
    model_label : str
        name of model, used to identify interpolated data values in instrument
        (default="model")
    model_units_attr : str
        Attribute for model xarray values that contains units (default='units')

    Returns
    -------
    interp_data.keys() : list
        List of keys of modelled data added to the instrument

    Raises
    ------
    ValueError
        For incorrect input arguments

    Notes
    -----
    For best results, select clean instrument data after alignment with model

    """
    # Ensure the array-like inputs are arrays
    inst_name = np.asarray(inst_name)
    mod_name = list(np.asarray(mod_name))

    if sel_name is None:
        sel_name = np.asarray(list(model.data_vars.keys()))
    else:
        sel_name = np.asarray(sel_name)
        if sel_name.shape == ():
            sel_name = np.asarray([sel_name])

    # Ensure the method flags are all lower-case for easy testing
    time_method = time_method.lower()
    pair_method = pair_method.lower()

    # Test input
    if len(inst_name) == 0:
        estr = 'Must provide instrument location attribute names as a list'
        raise ValueError(estr)

    if len(inst_name) != len(mod_name):
        estr = 'Must provide the same number of instrument and model '
        estr += 'location attribute names as a list'
        raise ValueError(estr)

    if len(mod_name) != len(mod_units):
        raise ValueError(''.join(['Must provide units for each model location',
                                  ' attribute']))

    if mod_time_name not in model.coords:
        raise ValueError("Unknown model time coordinate key name")

    if time_method not in ['min', 'max']:
        raise ValueError("unknown time method, expects 'min' or 'max'")

    if pair_method not in ['all', 'closest']:
        raise ValueError("unknown pairing method, expects 'all' or 'closest'")

    # Remove any model coordinates from the modelled data to interpolate
    sel_name = sel_name[[mdat not in mod_name for mdat in sel_name]]

    if len(sel_name) == 0:
        raise ValueError('No model data keys to interpolate')

    # Determine the scaling between model and instrument data
    inst_scale = np.ones(shape=len(inst_name), dtype=float)
    for i, iname in enumerate(inst_name):
        if iname not in inst.variables:
            raise ValueError(''.join(['Unknown instrument location index ',
                                      '{:} '.format(iname),
                                      '(should not be epoch time)']))

        # Some units may have extra information (e.g., 'degrees North').
        # Use only the actual units in the scaling function.  These are assumed
        # to come first.
        long_units = re.split(r"\W+|_",
                              inst.meta[iname, inst.meta.labels.units])[0]
        inst_scale[i] = pyutils.scale_units(mod_units[i], long_units)

    # Determine the model time resolution
    if mod_datetime_name in model.data_vars.keys():
        mod_datetime = model.data_vars[mod_datetime_name].values
    elif mod_datetime_name in model.coords:
        mod_datetime = model.coords[mod_datetime_name].values
    else:
        raise ValueError("".join(["unknown model name for datetime: ",
                                  mod_datetime_name]))

    # Determine the appropriate time difference in seconds from the instrument
    # and model data.  If only one time value is present, assume anything is
    # close enough
    dtime = mod_datetime[1:] - mod_datetime[:-1]
    if len(dtime) == 0:
        tm_sec = np.inf
    else:
        tm_sec = dtime.min() / np.timedelta64(1, 's')

    # Casting as values creates an array of numpy.timedelta64 objects in ns
    dtime = inst.index.values[1:] - inst.index.values[:-1]
    ti_sec = np.inf if len(dtime) == 0 else dtime.min().astype(float) * 1.0e-9

    # This will still work if infinite, since it will cause all data to be
    # accepted as close enough.
    if time_method == 'max':
        min_del = tm_sec if tm_sec > ti_sec else ti_sec
    else:
        min_del = tm_sec if tm_sec < ti_sec else ti_sec

    # Determine which instrument observations are within the model time
    # resolution of a model run
    mind = list()
    iind = list()
    del_sec = abs(mod_datetime
                  - inst.index.values[:, np.newaxis]).astype(float) * 1.0e-9
    for inst_ind, mod_ind in enumerate(del_sec.argmin(axis=1)):
        if del_sec[inst_ind, mod_ind] <= min_del:
            if mod_ind in mind and pair_method == 'closest':
                # Test to see if this model observation has multiple pairings
                old_ind = mind.index(mod_ind)
                if(del_sec[inst_ind, mod_ind]
                   < del_sec[iind[old_ind], mind[old_ind]]):
                    # If this one is closer, keep it
                    iind[old_ind] = inst_ind
                    mind[old_ind] = mod_ind
            else:
                # If this is a new point, keep it
                iind.append(inst_ind)
                mind.append(mod_ind)

    # Determine the model coordinates closest to the satellite track
    interp_dims = [mod_time_name]
    interp_shape = [inst.index.shape[0]]

    if not inst.pandas_format:
        for ss in model.dims.keys():
            if ss != mod_time_name:
                inst_dim = inst_name[mod_name.index(ss)]
                if inst_dim in inst.data.dims:
                    interp_dims.append(ss)
                    interp_shape.append(inst.data.dims[inst_dim])

    inst_coord = {kk: inst[inst_name[i]].values * inst_scale[i]
                  for i, kk in enumerate(mod_name)}

    # Initalize the interpolated data dictionary and test to ensure that the
    # instrument data doesn't already have the interpolated data
    interp_data = dict()
    interp_data_dims = dict()
    for mdat in sel_name:
        attr_name = "{:s}_{:s}".format(model_label, mdat)
        ishape = list()
        interp_data_dims[attr_name] = list()
        for idim in model[mdat].dims:
            if idim in interp_dims:
                ishape.append(interp_shape[interp_dims.index(idim)])
                if idim == mod_time_name:
                    adim = inst.index.name
                else:
                    adim = inst_name[mod_name.index(idim)]
                interp_data_dims[attr_name].append(adim)

        interp_data[attr_name] = np.full(shape=ishape, fill_value=np.nan)

    del_list = list()
    for mdat in interp_data.keys():
        if mdat in inst.data.keys():
            ps_mod.logger.warning("".join(["model data already interpolated:",
                                           " {:}".format(mdat)]))
            del_list.append(mdat)

    if len(interp_data.keys()) == len(del_list):
        raise ValueError("instrument object already contains all model data")
    elif len(del_list) > 0:
        for mdat in del_list:
            del interp_data[mdat]

    dim_warned = {mdat: False for mdat in interp_data.keys()}
    for i, ii in enumerate(iind):
        # Cycle through each model data type, since it may not depend on
        # all the dimensions
        for mdat in sel_name:
            # Define the output key
            attr_name = "{:s}_{:s}".format(model_label, mdat)

            # Skip if this is already included in the output instrument
            if attr_name not in interp_data.keys():
                break

            # Determine the dimension values
            dims = [kk for kk in model.data_vars[mdat].dims]
            indices = {mod_time_name: mind[i]}

            # Construct the data needed for interpolation, ensuring that
            # the types are appropriate
            values = model[indices][mdat].data
            points = [model.coords[kk].data for kk in dims if kk in mod_name]
            get_coords = True if len(points) > 0 else False
            xi = list()

            if not get_coords:
                try:
                    if interp_data[attr_name].shape == ():
                        interp_data[attr_name] = values
                    else:
                        interp_data[attr_name][ii] = values
                except ValueError:
                    del interp_data[attr_name]

                if not dim_warned[attr_name]:
                    # Only warn the user once for each model data type
                    ps_mod.logger.warning("".join(["model dimensions ",
                                                   repr(dims), " don't match",
                                                   " interpolation parameters ",
                                                   repr(mod_name), " for ",
                                                   mdat]))
                    dim_warned[attr_name] = True

            while get_coords:
                if inst.pandas_format:
                    # This data iterates only by time
                    xout = ii
                    xi.append([inst_coord[kk][ii]
                               for kk in dims if kk in mod_name])
                    get_coords = False
                else:
                    # This data may have additional dimensions
                    if len(xi) == 0:
                        # Determine the number of dimensions for this data value
                        idims = len(interp_data_dims[attr_name])

                        # Find relevent dimensions for cycling and slicing
                        ind_dims = [k for k, iname in enumerate(inst_name)
                                    if iname in interp_data_dims[attr_name]]
                        imod_dims = [k for k in ind_dims
                                     if mod_name[k] in dims]
                        coord_keys = [ckey for ckey in inst.data.coords.keys()]
                        ind_dims = [coord_keys.index(inst_name[k])
                                    for k in imod_dims]

                        # Set the number of cycles
                        icycles = 0
                        ncycles = sum([
                            len(inst.data.coords[inst_name[k]])
                            for k in imod_dims
                            if inst.data.coords[inst_name[k]].shape != ()])
                        cinds = np.zeros(shape=len(imod_dims), dtype=int)

                    # Get the instrument coordinate for this cycle
                    if icycles < ncycles or icycles == 0:
                        ss = [ii if k == 0 else 0 for k in range(idims)]
                        se = [ii + 1 if k == 0 else
                              len(inst.data.coords[
                                  interp_data_dims[attr_name][k]])
                              for k in range(idims) if k == 0
                              or (k > 0 and inst.data.coords[
                                  interp_data_dims[attr_name][k]].shape != ())]
                        xout = [cinds[ind_dims.index(k)] if k in ind_dims
                                else slice(ss[k], se[k]) for k in range(idims)]
                        xind = [cinds[ind_dims.index(k)] if k in ind_dims
                                else ss[k] for k in range(idims)]
                        xout = tuple(xout)
                        xind = tuple(xind)

                        # Add another coordinate set to the `xi` list
                        xi_new = list()
                        for kk in dims:
                            if kk in mod_name:
                                # This is the next instrument coordinate
                                k = mod_name.index(kk)
                                if inst_coord[kk].shape == ():
                                    xi_new.append(inst_coord[kk])
                                elif k in imod_dims:
                                    # This is an xarray coordiante
                                    cind = imod_dims.index(k)
                                    xi_new.append(inst_coord[kk][cinds[cind]])
                                else:
                                    # This is an xarray variable
                                    if inst_coord[kk].shape == ():
                                        xi_new.append(inst_coord[kk])
                                    elif len(inst_coord[kk].shape) == 1:
                                        # This allows time-only data
                                        xi_new.append(inst_coord[kk][xind[0]])
                                    else:
                                        xi_new.append(inst_coord[kk][xind])
                        xi.append(xi_new)

                        # Cycle the indices
                        if len(cinds) > 0:
                            k = 0
                            cinds[k] += 1

                            while cinds[k] >= inst.data.dims[
                                    inst_name[imod_dims[k]]]:
                                k += 1
                                if k < len(cinds):
                                    cinds[k - 1] = 0
                                    cinds[k] += 1
                                else:
                                    break
                        icycles += 1

                    # If we have cycled through all the coordinates for this
                    # time, move onto the next time
                    if icycles >= ncycles:
                        get_coords = False

                # Interpolate the desired values
                for xcoord in xi:
                    try:
                        yi = interpolate.interpn(points, values, xcoord,
                                                 method=method)
                    except ValueError as verr:
                        if str(verr).find("requested xi is out of bounds") > 0:
                            # This is acceptable, pad the interpolated data
                            # with NaN
                            ps_mod.logger.warning(
                                "{:} for {:s} data at {:}".format(verr, mdat,
                                                                  xcoord))
                            yi = [np.nan]
                        else:
                            raise ValueError(verr)

                    # Save the output
                    try:
                        interp_data[attr_name][xout] = yi[0]
                    except TypeError as terr:
                        ps_mod.logger.error(''.join(['check mod_name input ',
                                                     'against dimensions for ',
                                                     mdat, ' in model data']))
                        raise TypeError(terr)

    # Update the instrument object and attach units to the metadata
    for mdat in interp_data.keys():
        # Assign the units, if keyword is present in xarray data
        attr_name = mdat.split("{:s}_".format(model_label))[-1]
        mod_units = "missing"
        if hasattr(model.data_vars[attr_name], model_units_attr):
            mod_units = getattr(model.data_vars[attr_name], model_units_attr)
        inst.meta[mdat] = {inst.meta.labels.units: mod_units}

        if inst.pandas_format:
            inst[mdat] = pds.Series(interp_data[mdat], index=inst.index)
        else:
            inst.data = inst.data.assign({mdat: (interp_data_dims[mdat],
                                                 interp_data[mdat])})

    return list(interp_data.keys())