Source code for obsarray.templater.template_util

"""
Utilities for creating template xarray datasets
"""

from typing import Optional, Dict, List
from obsarray.templater.dataset_util import DatasetUtil
import xarray


__author__ = "Sam Hunt <sam.hunt@npl.co.uk>"


[docs] def create_ds( template: Dict[str, Dict], size: Dict[str, int], metadata: Optional[Dict] = None, append_ds: Optional[xarray.Dataset] = None, propagate_ds: Optional[xarray.Dataset] = None, ) -> xarray.Dataset: """ Returns template dataset :param template: dictionary defining ds variable structure, as defined below. :param size: dictionary of dataset dimensions, entry per dataset dimension with value of size as int :param metadata: dictionary of dataset metadata :param append_ds: base dataset to append with template variables :param propagate_ds: template dataset is populated with data from propagate_ds for their variables with common names and dimensions. Useful for transferring common data between datasets at different processing levels (e.g. times, etc.). :returns: template dataset For the ``template`` dictionary each key/value pair defines one variable, where the key is the variable name and the value is a dictionary with the following entries: * ``"dtype"`` (*np.typecodes*/*str*) - variable data type, either a numpy data type or special value ``"flag"`` for flag variable * ``"dim"`` (*list*) - list of variable dimension names * ``"attributes"`` (*dict*) - (optional) variable attributes * ``"encoding"`` (*dict*) - (optional) variable encoding. For more information on the required form of these entries, see the :ref:`variables definition section <variables dict>` of the user guide. """ # Create dataset ds = append_ds if append_ds is not None else xarray.Dataset() # Add variables ds = TemplateUtil.add_variables(ds, template, size) # Add metadata if metadata is not None: ds = TemplateUtil.add_metadata(ds, metadata) # Propagate variable data if propagate_ds is not None: TemplateUtil.propagate_values(ds, propagate_ds) return ds
def create_var(var_name: str, var_attrs: Dict, size: Dict[str, int]) -> xarray.Variable: """ Returns template variable :param var_name: variable name :param var_attrs: variable definition dictionary (as an entry to a template dictionary) :param size: dictionary of dataset dimensions, entry per dataset dimension with value of size as int :return: """ return TemplateUtil._create_var(var_name, var_attrs, size) class TemplateUtil: """ Class to create template xarray datasets """ @staticmethod def add_variables( ds: xarray.Dataset, template: Dict[str, Dict], size: Dict[str, int] ) -> xarray.Dataset: """ Adds defined variables dataset :param ds: dataset :param template: dictionary defining variables, see the :ref:`variables definition section <variables dict>` of the user guide for more information. :param size: dictionary of dataset dimensions, entry per dataset dimension with value of size as int :returns: dataset with defined variables """ for var_name in template.keys(): var = TemplateUtil._create_var(var_name, template[var_name], size) ds[var_name] = var return ds @staticmethod def _create_var( var_name: str, var_attrs: dict, size: Dict[str, int] ) -> xarray.Variable: du = DatasetUtil() # Check variable definition TemplateUtil._check_variable_definition(var_name, var_attrs) # Unpack variable attributes dtype = var_attrs["dtype"] dim_names = var_attrs["dim"] attributes = var_attrs["attributes"] if "attributes" in var_attrs else None err_corr = None if attributes is not None: if "err_corr" in attributes: err_corr = attributes.pop("err_corr") # Determine variable shape from dims try: dim_sizes = TemplateUtil._return_variable_shape(dim_names, size) except KeyError: raise KeyError( "Dim Name Error - Variable " + var_name + " defined with dim not in dim_sizes_dict" ) # Create variable and add to dataset if isinstance(dtype, str): if dtype == "flag": flag_meanings = attributes.pop("flag_meanings") variable = du.create_flags_variable( dim_sizes, meanings=flag_meanings, dim_names=dim_names, attributes=attributes, ) else: raise ValueError("unknown dtype - " + dtype) else: if err_corr is None: variable = du.create_variable( dim_sizes, dim_names=dim_names, dtype=dtype, attributes=attributes ) else: variable = du.create_unc_variable( dim_sizes, dim_names=dim_names, dtype=dtype, attributes=attributes, err_corr=err_corr, ) if "encoding" in var_attrs: du.add_encoding(variable, **var_attrs["encoding"]) variable.attrs.pop("_FillValue") return variable @staticmethod def _check_variable_definition(variable_name: str, variable_attrs: Dict): """ Checks validity of variable definition, raising errors as appropriate :param variable_name: variable name :param variable_attrs: variable defining dictionary """ # Variable name must be type str if type(variable_name) != str: raise TypeError( "Invalid variable name: " + str(variable_name) + " (must be string)" ) # todo - add more tests to check validity of variable definition @staticmethod def _return_variable_shape(dim_names: List[str], size: Dict[str, int]) -> List[int]: """ Returns dimension sizes of specified dimensions :param dim_names: dimension names :param size: dictionary of dataset dimensions, entry per dataset dimension with value of size as int :returns: dimension sizes """ return [size[dim_name] for dim_name in dim_names] @staticmethod def add_metadata(ds: xarray.Dataset, metadata: Dict) -> xarray.Dataset: """ Adds metadata to dataset :param ds: dataset :param metadata: dictionary of dataset metadata :returns: dataset with updated metadata """ ds.attrs.update(metadata) return ds @staticmethod def propagate_values(target_ds, source_ds, exclude=None): """ Populates target_ds in-place with data from source_ds for their variables with common names and dimensions. Useful for transferring common data between datasets at different processing levels (e.g. times, etc.). N.B. propagates data only, not variables as a whole with attributes etc. :type target_ds: xarray.Dataset :param target_ds: ds to populate (perhaps data at new processing level) :type source_ds: xarray.Dataset :param source_ds: ds to take data from (perhaps data at previous processing level) """ # Find variable names common to target_ds and source_ds, excluding specified exclude variables common_variable_names = list(set(target_ds).intersection(source_ds)) # common_variable_names = list(set(target_ds.variables).intersection(source_ds.variables)) # print(common_variable_names) if exclude is not None: common_variable_names = [ name for name in common_variable_names if name not in exclude ] # Remove any common variables that have different dimensions in target_ds and source_ds common_variable_names = [ name for name in common_variable_names if target_ds[name].dims == source_ds[name].dims ] # Propagate data for common_variable_name in common_variable_names: if ( target_ds[common_variable_name].shape == source_ds[common_variable_name].shape ): target_ds[common_variable_name].values = source_ds[ common_variable_name ].values # to do - add method to propagate common unpopulated metadata if __name__ == "__main__": pass