Commit 3d6d34a5 authored by BorjaEst's avatar BorjaEst
Browse files

Merge remote-tracking branch 'scclab/22-other-nc-files-in-folder-conflict-the-loading'

parents ac73a291 27662354
...@@ -21,10 +21,10 @@ class Source: ...@@ -21,10 +21,10 @@ class Source:
def skim(self): def skim(self):
for name, model in self._models.items(): for name, model in self._models.items():
path = self._name + "_" + name dirname = self._name + "_" + name
os.makedirs(path, exist_ok=True) os.makedirs(dirname, exist_ok=True)
logger.info("Skim data from '%s'", path) logger.info("Skim data from '%s'", dirname)
model.skim(path) model.skim(dirname)
class Model: class Model:
...@@ -38,19 +38,18 @@ class Model: ...@@ -38,19 +38,18 @@ class Model:
logger.debug("Load 'vmro3_zm' data") logger.debug("Load 'vmro3_zm' data")
self.__get_vmro3_zm(**variables) self.__get_vmro3_zm(**variables)
def skim(self, path): def skim(self, dirname):
if hasattr(self, '_tco3_zm'): if hasattr(self, '_tco3_zm'):
logger.debug("Skim 'tco3_zm' data") logger.debug("Skim 'tco3_zm' data")
utils.to_netcdf(path, "tco3_zm", self._tco3_zm) utils.to_netcdf(dirname, "tco3_zm", self._tco3_zm)
if hasattr(self, '_vmro3_zm'): if hasattr(self, '_vmro3_zm'):
logger.debug("Skim 'vmro3_zm' data") logger.debug("Skim 'vmro3_zm' data")
utils.to_netcdf(path, "vmro3_zm", self._vmro3_zm) utils.to_netcdf(dirname, "vmro3_zm", self._vmro3_zm)
@utils.return_on_failure("Error when loading 'tco3_zm'") @utils.return_on_failure("Error when loading 'tco3_zm'")
def __get_tco3_zm(self, tco3_zm, **kwarg): def __get_tco3_zm(self, tco3_zm, **kwarg):
"""Gets and standarises the tco3_zm data""" """Gets and standarises the tco3_zm data"""
fnames = glob.glob(tco3_zm['dir'] + "/*.nc") with xr.open_mfdataset(tco3_zm['paths']) as dataset:
with xr.open_mfdataset(fnames) as dataset:
dataset = dataset.rename({ dataset = dataset.rename({
tco3_zm['name']: 'tco3_zm', tco3_zm['name']: 'tco3_zm',
tco3_zm['coordinades']['time']: 'time', tco3_zm['coordinades']['time']: 'time',
...@@ -62,8 +61,7 @@ class Model: ...@@ -62,8 +61,7 @@ class Model:
@utils.return_on_failure("Error when loading 'vmro3_zm'") @utils.return_on_failure("Error when loading 'vmro3_zm'")
def __get_vmro3_zm(self, vmro3_zm, **kwarg): def __get_vmro3_zm(self, vmro3_zm, **kwarg):
"""Gets and standarises the vmro3_zm data""" """Gets and standarises the vmro3_zm data"""
fnames = glob.glob(vmro3_zm['dir'] + "/*.nc") with xr.open_mfdataset(vmro3_zm['paths']) as dataset:
with xr.open_mfdataset(fnames) as dataset:
dataset = dataset.rename({ dataset = dataset.rename({
vmro3_zm['name']: 'vmro3_zm', vmro3_zm['name']: 'vmro3_zm',
vmro3_zm['coordinades']['time']: 'time', vmro3_zm['coordinades']['time']: 'time',
......
...@@ -50,10 +50,10 @@ def create_empty_netCDF(fname): ...@@ -50,10 +50,10 @@ def create_empty_netCDF(fname):
root_grp.close() root_grp.close()
def to_netcdf(path, name, dataset): def to_netcdf(dirname, name, dataset):
"""Creates or appends data to named netcdf files""" """Creates or appends data to named netcdf files"""
years, dsx = zip(*dataset.groupby("time.year")) years, dsx = zip(*dataset.groupby("time.year"))
fnames = [path + "/" + name + "_%s.nc" % y for y in years] fnames = [dirname + "/" + name + "_%s.nc" % y for y in years]
logging.info("Save dataset into: %s", fnames) logging.info("Save dataset into: %s", fnames)
[create_empty_netCDF(fn) for fn in fnames if not os.path.isfile(fn)] [create_empty_netCDF(fn) for fn in fnames if not os.path.isfile(fn)]
xr.save_mfdataset(dsx, fnames, mode='a') xr.save_mfdataset(dsx, fnames, mode='a')
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
# expected to have only one folder output named "CCMI-1_IPSL". # expected to have only one folder output named "CCMI-1_IPSL".
# #
# This model has 2 variables (tco3_zm and vmro3_zm) which datasets are # This model has 2 variables (tco3_zm and vmro3_zm) which datasets are
# located in different directories. Therefore the key 'dir' is different # located in different directories. Therefore the key 'paths' is different
# in both of them. Therefore, the output expected at "CCMI-1_IPSL" is # in both of them. Therefore, the output expected at "CCMI-1_IPSL" is
# 2 type of files: # 2 type of files:
# - tco3_zm_[YEAR].nc: With tco3 skimmed data # - tco3_zm_[YEAR].nc: With tco3 skimmed data
...@@ -44,7 +44,7 @@ CCMI-1: ...@@ -44,7 +44,7 @@ CCMI-1:
name: toz name: toz
# Path where to find the netCDF files # Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY] # [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir: Ccmi/mon/toz paths: Ccmi/mon/toz/*.nc
# Coordinades description for tco3 data. # Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]: # [FIXED_KEY -- MANDATORY]:
coordinades: coordinades:
...@@ -62,7 +62,7 @@ CCMI-1: ...@@ -62,7 +62,7 @@ CCMI-1:
name: vmro3 name: vmro3
# Path where to find the netCDF files # Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY] # [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir: Ccmi/mon/vmro3 paths: Ccmi/mon/vmro3/*_????.nc
# Coordinades description for vmro3 data. # Coordinades description for vmro3 data.
# [FIXED_KEY -- MANDATORY]: # [FIXED_KEY -- MANDATORY]:
coordinades: coordinades:
...@@ -85,7 +85,7 @@ CCMI-1: ...@@ -85,7 +85,7 @@ CCMI-1:
# #
# This case of ERA-i indeed has 2 variables (tco3_zm and vmro3_zm) but in # This case of ERA-i indeed has 2 variables (tco3_zm and vmro3_zm) but in
# this case, are located inside the same dataset files, therefore the # this case, are located inside the same dataset files, therefore the
# key 'dir' is the same in both variables. The output expected at # key 'paths' is the same in both variables. The output expected at
# "ECMWF_ERA-5" is 2 type of files: # "ECMWF_ERA-5" is 2 type of files:
# - tco3_zm_[YEAR].nc: With tco3 skimmed data # - tco3_zm_[YEAR].nc: With tco3 skimmed data
# - vmro3_zm_[YEAR].nc: With vmro3 skimmed data # - vmro3_zm_[YEAR].nc: With vmro3 skimmed data
...@@ -105,7 +105,7 @@ ECMWF: ...@@ -105,7 +105,7 @@ ECMWF:
name: tco3 name: tco3
# Path where to find the netCDF files # Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY] # [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir: Ecmwf/Era5 paths: Ecmwf/Era5/*.nc
# Coordinades description for tco3 data. # Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]: # [FIXED_KEY -- MANDATORY]:
coordinades: coordinades:
...@@ -123,7 +123,7 @@ ECMWF: ...@@ -123,7 +123,7 @@ ECMWF:
name: toz name: toz
# Path where to find the netCDF files # Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY] # [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir: Ecmwf/Erai paths: Ecmwf/Erai/*.nc
# Coordinades description for tco3 data. # Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]: # [FIXED_KEY -- MANDATORY]:
coordinades: coordinades:
...@@ -141,7 +141,7 @@ ECMWF: ...@@ -141,7 +141,7 @@ ECMWF:
name: vmro3 name: vmro3
# Path where to find the netCDF files # Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY] # [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir: Ecmwf/Erai paths: Ecmwf/Erai/*.nc
# Coordinades description for vmro3 data. # Coordinades description for vmro3 data.
# [FIXED_KEY -- MANDATORY]: # [FIXED_KEY -- MANDATORY]:
coordinades: coordinades:
......
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
import xarray as xr import xarray as xr
import numpy as np import numpy as np
import netCDF4
import os.path
import datetime import datetime
import os
from o3skim import utils from o3skim import utils
...@@ -37,7 +36,7 @@ def dataset(name, coordinades): ...@@ -37,7 +36,7 @@ def dataset(name, coordinades):
) )
def netcdf(path, name, coordinades, **kwarg): def netcdf(dirname, name, coordinades, **kwarg):
"""Creates or appends data to a mock netcdf file""" """Creates or appends data to a mock netcdf file"""
ds = dataset(name, coordinades) ds = dataset(name, coordinades)
utils.to_netcdf(path, name, ds) utils.to_netcdf(dirname, name, ds)
"""This modules creates mockup data for testing"""
import xarray as xr
import numpy as np
import datetime
import random
base = datetime.datetime(2000, 1, 1)
indexes = {
'time': [base + datetime.timedelta(days=9*i) for i in range(99)],
'plev': [x for x in range(1, 1000, 100)],
'lat': [x for x in range(-90, 90, 10)],
'lon': [x for x in range(-180, 180, 20)]
}
def data_vars():
"""Creates a mock n-array with coordinade values"""
dim = [len(axis) for _, axis in indexes.items()]
return tuple(indexes), np.ones(dim),
def data_coord():
"""Creates a mock coordinades"""
return indexes
def dataset(name):
"""Creates a dataset acording to the global module indexes"""
return xr.Dataset(
{name: data_vars()},
coords = data_coord()
)
def netcdf(path, name, **kwarg):
"""Creates or appends data to a noise netcdf file"""
ds=dataset(name)
ds.to_netcdf(path)
- name: toz
path: Ccmi/mon/toz/toz_noise.nc
- name: tco3
path: Ecmwf/Era5/tco3_noise.nc
- name: toz
path: Ecmwf/Erai/toz_noise.nc
- name: vmro3
path: Ecmwf/Erai/vmro3_noise.nc
...@@ -2,14 +2,14 @@ CCMI-1: ...@@ -2,14 +2,14 @@ CCMI-1:
IPSL: IPSL:
tco3_zm: tco3_zm:
name: toz name: toz
dir: Ccmi/mon/toz paths: Ccmi/mon/toz/toz_????.nc
coordinades: coordinades:
time: time time: time
lat: lat lat: lat
lon: lon lon: lon
vmro3_zm: vmro3_zm:
name: vmro3 name: vmro3
dir: Ccmi/mon/vmro3 paths: Ccmi/mon/vmro3/*.nc
coordinades: coordinades:
time: time time: time
plev: plev plev: plev
...@@ -19,7 +19,7 @@ ECMWF: ...@@ -19,7 +19,7 @@ ECMWF:
ERA-5: ERA-5:
tco3_zm: tco3_zm:
name: tco3 name: tco3
dir: Ecmwf/Era5 paths: Ecmwf/Era5/tco3_????.nc
coordinades: coordinades:
lon: longitude lon: longitude
lat: latitude lat: latitude
...@@ -27,14 +27,14 @@ ECMWF: ...@@ -27,14 +27,14 @@ ECMWF:
ERA-i: ERA-i:
tco3_zm: tco3_zm:
name: toz name: toz
dir: Ecmwf/Erai paths: Ecmwf/Erai/toz_????.nc
coordinades: coordinades:
time: time time: time
lat: latitude lat: latitude
lon: longitude lon: longitude
vmro3_zm: vmro3_zm:
name: vmro3 name: vmro3
dir: Ecmwf/Erai paths: Ecmwf/Erai/vmro3_????.nc
coordinades: coordinades:
time: time time: time
plev: level plev: level
......
...@@ -2,7 +2,7 @@ ECMWF: ...@@ -2,7 +2,7 @@ ECMWF:
ERA-i: ERA-i:
vmro3_zm: # Correct variable vmro3_zm: # Correct variable
name: vmro3 name: vmro3
dir: Ecmwf/Erai paths: Ecmwf/Erai/vmro3_????.nc
coordinades: coordinades:
time: time time: time
plev: level plev: level
...@@ -10,7 +10,7 @@ ECMWF: ...@@ -10,7 +10,7 @@ ECMWF:
lon: longitude lon: longitude
tco3_zm: # Incorrect variable tco3_zm: # Incorrect variable
name: non_existing_var name: non_existing_var
dir: Ecmwf/Erai paths: Ecmwf/Erai/toz_????.nc
coordinades: coordinades:
time: time time: time
lat: latitude lat: latitude
......
...@@ -10,6 +10,7 @@ import glob ...@@ -10,6 +10,7 @@ import glob
from o3skim import sources, utils from o3skim import sources, utils
# from pyfakefs.fake_filesystem_unittest import TestCase # from pyfakefs.fake_filesystem_unittest import TestCase
from . import mockup_data from . import mockup_data
from . import mockup_noise
class TestO3SKIM_sources(unittest.TestCase): class TestO3SKIM_sources(unittest.TestCase):
...@@ -25,22 +26,31 @@ class TestO3SKIM_sources(unittest.TestCase): ...@@ -25,22 +26,31 @@ class TestO3SKIM_sources(unittest.TestCase):
self.create_mock_datasets() self.create_mock_datasets()
self.backup_datasets() self.backup_datasets()
self.assert_with_backup() self.assert_with_backup()
self.create_noise_datasets()
def tearDown(self): def tearDown(self):
"""Tear down test fixtures, if any.""" """Tear down test fixtures, if any."""
def create_mock_datasets(self): def create_mock_datasets(self):
"""Creates mock data files according to the loaded configuration""" """Creates mock data files according to the loaded configuration"""
for _, collection in self.config_base.items(): with utils.cd('data'):
for _, variables in collection.items(): for _, collection in self.config_base.items():
for _, vinfo in variables.items(): for _, variables in collection.items():
path = "data/" + vinfo["dir"] for _, vinfo in variables.items():
os.makedirs(path, exist_ok=True) dirname = os.path.dirname(vinfo['paths'])
mockup_data.netcdf(path, **vinfo) os.makedirs(dirname, exist_ok=True)
mockup_data.netcdf(dirname, **vinfo)
def create_noise_datasets(self):
"""Creates noise data files according to the noise configuration"""
config_noise = utils.load("tests/noise_files.yaml")
with utils.cd('data'):
for ninfo in config_noise:
mockup_noise.netcdf(**ninfo)
def clean_output(self): def clean_output(self):
"""Cleans output removing all folders at output""" """Cleans output removing all folders at output"""
with utils.cd("output"): with utils.cd('output'):
directories = (d for d in os.listdir() if os.path.isdir(d)) directories = (d for d in os.listdir() if os.path.isdir(d))
for directory in directories: for directory in directories:
shutil.rmtree(directory) shutil.rmtree(directory)
...@@ -48,24 +58,24 @@ class TestO3SKIM_sources(unittest.TestCase): ...@@ -48,24 +58,24 @@ class TestO3SKIM_sources(unittest.TestCase):
def backup_datasets(self): def backup_datasets(self):
"""Loads the mock datasets into an internal variable""" """Loads the mock datasets into an internal variable"""
self.ds_backup = {} self.ds_backup = {}
for source, collection in self.config_base.items(): with utils.cd('data'):
self.ds_backup[source] = {} for source, collection in self.config_base.items():
for model, variables in collection.items(): self.ds_backup[source] = {}
self.ds_backup[source][model] = {} for model, variables in collection.items():
for v, vinfo in variables.items(): self.ds_backup[source][model] = {}
paths = "data/" + vinfo["dir"] + "/*.nc" for v, vinfo in variables.items():
with xr.open_mfdataset(paths) as ds: with xr.open_mfdataset(vinfo['paths']) as ds:
self.ds_backup[source][model][v] = ds self.ds_backup[source][model][v] = ds
def assert_with_backup(self): def assert_with_backup(self):
"""Asserts the dataset in the backup is equal to the config load""" """Asserts the dataset in the backup is equal to the config load"""
for source, collection in self.config_base.items(): with utils.cd('data'):
for model, variables in collection.items(): for source, collection in self.config_base.items():
for v, vinfo in variables.items(): for model, variables in collection.items():
paths = "data/" + vinfo["dir"] + "/*.nc" for v, vinfo in variables.items():
with xr.open_mfdataset(paths) as ds: with xr.open_mfdataset(vinfo['paths']) as ds:
xr.testing.assert_identical( xr.testing.assert_identical(
self.ds_backup[source][model][v], ds) self.ds_backup[source][model][v], ds)
def test_001_SourcesFromConfig(self): def test_001_SourcesFromConfig(self):
"""Creates the different sources from the configuration file""" """Creates the different sources from the configuration file"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment