Commit 7e2909b8 authored by BorjaEst's avatar BorjaEst
Browse files

New module standardization

parent 742a95b0
......@@ -13,7 +13,10 @@ import xarray as xr
import os.path
import datetime
import logging
from . import utils
from o3skim import utils
from o3skim import standardization
logger = logging.getLogger('o3skim.sources')
......@@ -40,7 +43,7 @@ class Source:
logging.info("Load model '%s'", name)
self._models[name] = Model(variables)
def skim(self, groupby=None):
def skim(self, groupby=None, **kwargs):
"""Request to skim all source data into the current folder
:param groupby: How to group output (None, year, decade).
......@@ -50,10 +53,8 @@ class Source:
dirname = self._name + "_" + name
os.makedirs(dirname, exist_ok=True)
logger.info("Skim data from '%s'", dirname)
model.to_netcdf(dirname, groupby)
model.to_netcdf(dirname, groupby, **kwargs)
xr.Dataset.__init__
class Model(xr.Dataset):
"""Conceptual class for model with variables. It is produced by the
......@@ -65,18 +66,15 @@ class Model(xr.Dataset):
:type variables: dict
"""
def __init__(self, variables):
def __init__(self, specifications):
ds = xr.Dataset()
if 'tco3_zm' in variables:
logger.debug("Load 'tco3_zm' data")
ds = ds.merge(get_tco3_zm(**variables))
if 'vmro3_zm' in variables:
logger.debug("Load 'vmro3_zm' data")
ds = ds.merge(get_vmro3_zm(**variables))
for variable in specifications:
load = standardization.load(variable, specifications[variable])
ds = ds.merge(load)
# Containment
self.dataset = ds
def __getattr__(self,attr):
def __getattr__(self, attr):
# Delegation
return getattr(self.dataset, attr)
......@@ -95,7 +93,7 @@ class Model(xr.Dataset):
"""
if delta == None:
return "", self
return [("", self)]
logger.debug("Group model by: '{0}' ".format(delta))
if delta == 'year':
......@@ -117,10 +115,10 @@ class Model(xr.Dataset):
var_models = []
for var in self.data_vars:
logger.debug("Internal var: '{0}' to dataset".format(var))
var_models.extend((var, self[var].to_dataset()))
var_models.append((var, self[var].to_dataset()))
return var_models
def to_netcdf(self, path, *arg, delta=None, **kwargs):
def to_netcdf(self, path, delta=None, **kwargs):
"""Request to save model data into the specified path
:param path: Path where to place the output files.
......@@ -137,38 +135,11 @@ class Model(xr.Dataset):
paths = []
for t_range, ds1 in self.groupby(delta=delta):
for var, ds2 in ds1.split_variables():
datasets.extend(ds2)
datasets.append(ds2)
if t_range == "":
paths.extend(path + "/" + var + ".nc")
paths.append(path + "/" + var + ".nc")
else:
paths.extend(path + "/" + var + "_" + t_range + ".nc")
paths.append(path + "/" + var + "_" + t_range + ".nc")
logging.info("Save dataset into: %s", paths)
xr.save_mfdataset(datasets, paths, *arg, **kwargs)
@utils.return_on_failure("Error when loading 'tco3_zm'")
def get_tco3_zm(tco3_zm, **kwarg):
"""Gets and standarises the tco3_zm data"""
with xr.open_mfdataset(tco3_zm['paths']) as dataset:
dataset = dataset.rename({
tco3_zm['name']: 'tco3_zm',
tco3_zm['coordinates']['time']: 'time',
tco3_zm['coordinates']['lat']: 'lat',
tco3_zm['coordinates']['lon']: 'lon'
})['tco3_zm'].to_dataset()
return dataset.mean(dim='lon')
@utils.return_on_failure("Error when loading 'vmro3_zm'")
def get_vmro3_zm(vmro3_zm, **kwarg):
"""Gets and standarises the vmro3_zm data"""
with xr.open_mfdataset(vmro3_zm['paths']) as dataset:
dataset = dataset.rename({
vmro3_zm['name']: 'vmro3_zm',
vmro3_zm['coordinates']['time']: 'time',
vmro3_zm['coordinates']['plev']: 'plev',
vmro3_zm['coordinates']['lat']: 'lat',
vmro3_zm['coordinates']['lon']: 'lon'
})['vmro3_zm'].to_dataset()
return dataset.mean(dim='lon')
xr.save_mfdataset(datasets, paths, **kwargs)
"""Module in charge of dataset standardization when loading models."""
import logging
import xarray as xr
from o3skim import utils
logger = logging.getLogger('o3skim.standardization')
# tco3 standardization
tco3_standard_name = 'tco3_zm'
tco3_mean_coordinate = 'lon'
tco3_standard_coordinates = [
'time',
'lat',
'lon'
]
# vmro3 standardization
vmro3_standard_name = 'vmro3_zm'
vmro3_mean_coordinate = 'lon'
vmro3_standard_coordinates = [
'time',
'plev',
'lat',
'lon'
]
@utils.return_on_failure("Error when loading '{0}'".format(tco3_standard_name))
def __load_tco3(name, paths, coordinates):
"""Loads and standarises the tco3 data"""
logger.debug("Standard loading of '{0}' data".format(tco3_standard_name))
with xr.open_mfdataset(paths) as dataset:
dataset = dataset.rename({
**{name: tco3_standard_name},
**{coordinates[x]: x for x in tco3_standard_coordinates}
})[tco3_standard_name].to_dataset()
return dataset.mean(dim=tco3_mean_coordinate)
@utils.return_on_failure("Error when loading '{0}'".format(vmro3_standard_name))
def __load_vmro3(name, paths, coordinates):
"""Loads and standarises the vmro3 data"""
logger.debug("Standard loading of '{0}' data".format(vmro3_standard_name))
with xr.open_mfdataset(paths) as dataset:
dataset = dataset.rename({
**{name: vmro3_standard_name},
**{coordinates[x]: x for x in vmro3_standard_coordinates}
})[vmro3_standard_name].to_dataset()
return dataset.mean(dim=vmro3_mean_coordinate)
# Load case dictionary
__loads = {
tco3_standard_name: __load_tco3,
vmro3_standard_name: __load_vmro3
}
# Non existing variable exception
class UnknownVariable(Exception):
"""To raise if variable to treat is unknown"""
def __init__(self, variable, message="Unknown variable"):
self.variable = variable
self.message = message
super().__init__(self.message)
def load(variable, configuration):
"""Loads and standarises the variable using a specific
configuration.
:param variable: Loadable variable.
:type variable: str
:param configuration: Configuration to apply standardization.
:type configuration: dict
:return: A list of tuples (range, group model).
:rtype: [(str, Model)]
"""
try:
function = __loads[variable]
except KeyError:
raise UnknownVariable(variable)
return function(**configuration)
"""Unittest module template.
"""
"""Unittest module template."""
import os
import shutil
......@@ -85,22 +83,15 @@ class TestO3SKIM_sources(unittest.TestCase):
name, collection in self.config_base.items()}
# CCMI-1 tco3_zm asserts
self.assertTrue(
'time' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
self.assertTrue(
'lat' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
self.assertFalse(
'lon' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
self.assertTrue( 'time' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
self.assertTrue( 'lat' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
self.assertFalse( 'lon' in ds['CCMI-1']._models['IPSL']['tco3_zm'].coords)
# CCMI-1 vmro3_zm asserts
self.assertTrue(
'time' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertTrue(
'plev' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertTrue(
'lat' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertFalse(
'lon' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertTrue( 'time' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertTrue( 'plev' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertTrue( 'lat' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
self.assertFalse( 'lon' in ds['CCMI-1']._models['IPSL']['vmro3_zm'].coords)
# Checks the original data has not been modified
self.assert_with_backup()
......@@ -142,15 +133,15 @@ class TestO3SKIM_sources(unittest.TestCase):
# CCMI-1 data skim asserts
self.assertTrue(os.path.isdir("output/CCMI-1_IPSL"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/vmro3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm_2000-2001.nc"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/vmro3_zm_2000-2001.nc"))
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ECMWF_ERA-5"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-5/tco3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-5/tco3_zm_2000-2001.nc"))
self.assertTrue(os.path.isdir("output/ECMWF_ERA-i"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/tco3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/tco3_zm_2000-2001.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm_2000-2001.nc"))
# Checks the original data has not been modified
self.assert_with_backup()
......@@ -168,12 +159,9 @@ class TestO3SKIM_sources(unittest.TestCase):
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ErrorModels_correct_variable"))
self.assertTrue(os.path.exists(
"output/ErrorModels_correct_variable/vmro3_zm.nc"))
self.assertTrue(os.path.isdir(
"output/ErrorModels_non_existing_variable"))
self.assertTrue(
len(os.listdir("output/ErrorModels_non_existing_variable")) == 0)
self.assertTrue(os.path.exists( "output/ErrorModels_correct_variable/vmro3_zm.nc"))
self.assertTrue(os.path.isdir( "output/ErrorModels_non_existing_variable"))
self.assertTrue( len(os.listdir("output/ErrorModels_non_existing_variable")) == 0)
# self.assertTrue(os.path.isdir("output/ECMWF_wrong_coordinates"))
# self.assertTrue(len(os.listdir("output/ECMWF_wrong_coordinates")) == 0)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment