Commit a12d54d7 authored by BorjaEst's avatar BorjaEst
Browse files

Integration of metadata.yaml output when skimming

parent 72808c10
......@@ -41,7 +41,7 @@ if __name__ == '__main__':
# Create sources"Loading data from './data' ")
ds = {name: o3skim.Source(name, collection) for
ds = {name: o3skim.Source(name, **collection) for
name, collection in config.items()}
# Skim output
......@@ -36,6 +36,14 @@ class ModelAccessor:
return None
def metadata(self):
"""Return the ozone volume mixing ratio of this dataset."""
result = self._model.attrs
for var in self._model.var():
result = {**result, var: self._model[var].attrs}
return result
def groupby_year(self):
"""Returns a grouped dataset by year"""
logger.debug("Performing group by year on model")
......@@ -115,6 +123,14 @@ class Tests(unittest.TestCase):
expected = Tests.vmro3_datarray().to_dataset(name="vmro3_zm")
xr.testing.assert_equal(self.ds.model.vmro3, expected)
def test_metadata_property(self):
metadata = self.ds.model.metadata
self.assertEqual(metadata["description"], "Test dataset")
["description"], "Test tco3 datarray")
["description"], "Test vmro3 datarray")
def test_groupby_year(self):
groups = self.ds.model.groupby_year()
self.assertEqual(25, len(groups))
......@@ -32,21 +32,25 @@ class Source:
The current supported model variables are "tco3_zm" and "vmro3_zm",
which should contain the information on how to retrieve the data
from the netCDF collection.
:param name: Name to provide to the source.
:type name: str
:param metadata: Source metadata, defaults to {}.
:type metadata: dict, optional
:param collections: Dictionary where each 'key' is a model name
and its value another dictionary with the variable loading
statements for that model.
{name:str, paths: str, coordinates: dict}
:type collections: dict
:type collections: **dict
def __init__(self, name, collections): = name
def __init__(self, name, metadata={}, **collections):
self._name = name
self._metadata = metadata
self._models = {}"Loading source '%s'","Loading source '%s'", name)
for name, specifications in collections.items():"Loading model '%s'", name)
model = _load_model(**specifications)
......@@ -56,10 +60,18 @@ class Source:
def __getitem__(self, model_name):
return self._models[model_name]
def name(self):
return self._name
def models(self):
return list(self._models.keys())
def metadata(self):
return self._metadata
def skim(self, groupby=None):
"""Request to skim all source data into the current folder.
......@@ -72,17 +84,37 @@ class Source:
:type groupby: str, optional
for model in self._models:
dirname = "{source}_{model}".format(, model=model)
dirname = "{}_{}".format(self._name, model)
os.makedirs(dirname, exist_ok=True)"Skimming data from '%s'", dirname)
_skim(self[model], delta=groupby)
source_metadata = self.metadata
model_metadata = self[model].model.metadata
metadata = {**source_metadata, **model_metadata}
_skim(self[model], delta=groupby, metadata=metadata)
@utils.return_on_failure("Error when loading model", default=None)
def _load_model(tco3_zm=None, vmro3_zm=None):
"""Loads a model merging standardized data from specified datasets."""
dataset = xr.Dataset()
def _load_model(tco3_zm=None, vmro3_zm=None, metadata={}):
"""Loads a model merging standardized data from specified datasets.
:param tco3_zm: tco3 variable description, defaults to None.
:type tco3_zm: {name:str, paths:str,
coordinates:{lat:str, lon:str, time:str}},
:param vmro3_zm: vmro3 variable description, defaults to None.
:type vmro3_zm: {name:str, paths:str,
coordinates:{lat:str, lon:str, plev:str time:str}},
:param metadata: Source metadata, defaults to {}.
:type metadata: dict, optional
:return: Dataset with specified variables.
:rtype: xarray.Dataset
dataset = xr.Dataset(attrs=metadata)
if tco3_zm:
logger.debug("Loading tco3_zm into model")
with xr.open_mfdataset(tco3_zm['paths']) as load:
......@@ -91,6 +123,7 @@ def _load_model(tco3_zm=None, vmro3_zm=None):
dataset = dataset.merge(standardized)
dataset.tco3_zm.attrs = tco3_zm.get('metadata', {})
if vmro3_zm:
logger.debug("Loading vmro3_zm into model")
with xr.open_mfdataset(vmro3_zm['paths']) as load:
......@@ -99,11 +132,25 @@ def _load_model(tco3_zm=None, vmro3_zm=None):
dataset = dataset.merge(standardized)
dataset.vmro3_zm.attrs = vmro3_zm.get('metadata', {})
return dataset
def _skim(model, delta=None):
"""Skims model producing reduced dataset files"""
def _skim(model, delta=None, metadata=None):
"""Skims model producing reduced dataset files. It is possible to
indicate the time to split the output by 'delta'. If metadata is
introduced in the form of dict, a 'metadata.yaml' file is
generated together with the skimmed output.
:param model: Dataset with ModelAccessor to skim.
:type model: xarray.Dataset
:param metadata: Model metadata, to save as yaml defaults to None.
:type metadata: dict, optional
:param delta: How to group output (None, 'year', 'decade').
:type delta:str, optional
logger.debug("Skimming model with delta {}".format(delta))
skimmed = model.model.skim()
if delta == 'year':
......@@ -131,6 +178,9 @@ def _skim(model, delta=None):
datasets=[ds.model.vmro3 for ds in datasets],
paths=[vmro3_path(year) for year in years]
if metadata:
logger.debug("Creating metadata.yaml file")"metadata.yaml", metadata=metadata)
class TestsSource(unittest.TestCase):
......@@ -74,3 +74,17 @@ def load(yaml_file):
config = yaml.safe_load(ymlfile)
logging.debug("Configuration data: %s", config)
return config
def save(file_name, metadata):
"""Saves the metadata dict on the current folder with yaml
:param file_name: Name for the output yaml file.
:type file_name: str
:param metadata: Dict with the data to save into.
:type metadata: dict
with open(file_name, 'w+') as ymlfile:
yaml.dump(metadata, ymlfile, allow_unicode=True)
......@@ -71,7 +71,7 @@ def source_name(request):
def source(config_dict, source_name, data_dir):
source = o3skim.Source(source_name, config_dict[source_name])
source = o3skim.Source(source_name, **config_dict[source_name])
return source
......@@ -14,7 +14,7 @@ class TestSource:
def test_constructor(self, config_dict, source_name, data_dir):
source = o3skim.Source(source_name, config_dict[source_name])
source = o3skim.Source(source_name, **config_dict[source_name])
assert type( is str
assert type(source.models) is list
assert source.models != []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment