Commit 4ec24945 authored by BorjaEst's avatar BorjaEst
Browse files

Merge branch 'dev' into 24-generate-documentation

parents c0cb1972 aacf5f7a
// For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.134.1/containers/docker-existing-dockerfile
{
"name": "Existing Dockerfile",
// Sets the run context to one level up instead of the .devcontainer folder.
"context": "..",
// Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
"dockerFile": "../Dockerfile",
// Set *default* container specific settings.json values on container create.
"settings": {
"terminal.integrated.shell.linux": "/bin/bash"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"lextudio.restructuredtext"
]
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Uncomment the next line to run commands after the container is created - for example installing curl.
// "postCreateCommand": "apt-get update && apt-get install -y curl",
// Uncomment when using a ptrace-based debugger like C++, Go, and Rust
// "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ],
// Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker.
// "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ],
// Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root.
// "remoteUser": "vscode"
}
......@@ -61,5 +61,5 @@ USER ${user}
# Start default script
ENTRYPOINT [ "main" ]
CMD [ "-v 1" ]
CMD [ "--verbosity ERROR" ]
......@@ -16,6 +16,9 @@ def cmdline_args():
# Arguments
p.add_argument("-f", "--sources_file", type=str, default="./sources.yaml",
help="custom sources YAML configuration (default: %(default)s)")
p.add_argument("-s", "--split_by", type=str, default=None,
choices=['year', 'decade'],
help="Period time to split output (default: %(default)s)")
p.add_argument("-v", "--verbosity", type=str, default='ERROR',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help="Sets the logging level (default: %(default)s)")
......@@ -42,4 +45,4 @@ if __name__ == '__main__':
# Skim output
logging.info("Skimming data to './output' ")
with utils.cd("output"):
[source.skim() for source in ds.values()]
[source.skim(groupby=args.split_by) for source in ds.values()]
......@@ -39,14 +39,17 @@ class Source:
logging.info("Load model '%s'", name)
self._models[name] = Model(variables)
def skim(self):
def skim(self, groupby=None):
"""Request to skim all source data into the current folder
:param groupby: How to group output (None, year, decade).
:type groupby: str, optional
"""
for name, model in self._models.items():
path = self._name + "_" + name
os.makedirs(path, exist_ok=True)
logger.info("Skim data from '%s'", path)
model.skim(path)
dirname = self._name + "_" + name
os.makedirs(dirname, exist_ok=True)
logger.info("Skim data from '%s'", dirname)
model.skim(dirname, groupby)
class Model:
......@@ -67,24 +70,26 @@ class Model:
logger.debug("Load 'vmro3_zm' data")
self.__get_vmro3_zm(**variables)
def skim(self, path):
def skim(self, dirname, groupby=None):
"""Request to skim all source data into the specified path
:param path: Path where to place the output files
:type path: str
:param dirname: Path where to place the output files.
:type dirname: str
:param groupby: How to group output (None, year, decade).
:type groupby: str, optional
"""
if hasattr(self, '_tco3_zm'):
logger.debug("Skim 'tco3_zm' data")
utils.to_netcdf(path, "tco3_zm", self._tco3_zm)
utils.to_netcdf(dirname, "tco3_zm", self._tco3_zm, groupby)
if hasattr(self, '_vmro3_zm'):
logger.debug("Skim 'vmro3_zm' data")
utils.to_netcdf(path, "vmro3_zm", self._vmro3_zm)
utils.to_netcdf(dirname, "vmro3_zm", self._vmro3_zm, groupby)
@utils.return_on_failure("Error when loading 'tco3_zm'")
def __get_tco3_zm(self, tco3_zm, **kwarg):
"""Gets and standarises the tco3_zm data"""
fnames = glob.glob(tco3_zm['dir'] + "/*.nc")
with xr.open_mfdataset(fnames) as dataset:
with xr.open_mfdataset(tco3_zm['paths']) as dataset:
dataset = dataset.rename({
tco3_zm['name']: 'tco3_zm',
tco3_zm['coordinates']['time']: 'time',
......@@ -96,8 +101,7 @@ class Model:
@utils.return_on_failure("Error when loading 'vmro3_zm'")
def __get_vmro3_zm(self, vmro3_zm, **kwarg):
"""Gets and standarises the vmro3_zm data"""
fnames = glob.glob(vmro3_zm['dir'] + "/*.nc")
with xr.open_mfdataset(fnames) as dataset:
with xr.open_mfdataset(vmro3_zm['paths']) as dataset:
dataset = dataset.rename({
vmro3_zm['name']: 'vmro3_zm',
vmro3_zm['coordinates']['time']: 'time',
......
......@@ -82,7 +82,7 @@ def create_empty_netCDF(fname):
root_grp.close()
def to_netcdf(path, name, dataset):
def to_netcdf(dirname, name, dataset, groupby=None):
"""Creates or appends data to named netCDF files.
:param path: Location where to find or create the netCDF files.
......@@ -93,9 +93,35 @@ def to_netcdf(path, name, dataset):
:param dataset: Dataset to write to the netCDF file.
:type dataset: :class:`xarray.Dataset`
:param groupby: How to group files (None, year, decade).
:type groupby: str, optional
"""
years, dsx = zip(*dataset.groupby("time.year"))
fnames = [path + "/" + name + "_%s.nc" % y for y in years]
def split_by_year(dataset):
"""Splits a dataset by year"""
years, dsx = zip(*dataset.groupby("time.year"))
fnames = [dirname + "/" + name + "_%s.nc" % y for y in years]
return fnames, dsx
def split_by_decade(dataset):
"""Splits a dataset by decade"""
decades = dataset.indexes["time"].year//10*10
decades, dsx = zip(*dataset.groupby(xr.DataArray(decades)))
fnames = [dirname + "/" + name + "_%s-%s.nc" % (d, d+10) for d in decades]
return fnames, dsx
def no_split(dataset):
"""Does not split a dataset"""
dsx = (dataset,)
fnames = [dirname + "/" + name + ".nc"]
return fnames, dsx
split_by = {
"year": split_by_year,
"decade": split_by_decade
}
fnames, dsx = split_by.get(groupby, no_split)(dataset)
logging.info("Save dataset into: %s", fnames)
[create_empty_netCDF(fn) for fn in fnames if not os.path.isfile(fn)]
xr.save_mfdataset(dsx, fnames, mode='a')
......@@ -2,9 +2,8 @@
import xarray as xr
import numpy as np
import netCDF4
import os.path
import datetime
import os
from o3skim import utils
......@@ -30,14 +29,15 @@ def map_coord(coordinates):
def dataset(name, coordinates):
"""Creates a dataset acording to the global module indexes"""
"""Creates a dataset according to the global module indexes"""
return xr.Dataset(
{name: data_vars(coordinates)},
coords=map_coord(coordinates)
)
def netcdf(path, name, coordinates, **kwarg):
def netcdf(dirname, name, coordinates, **kwarg):
"""Creates or appends data to a mock netcdf file"""
ds = dataset(name, coordinates)
utils.to_netcdf(path, name, ds)
utils.to_netcdf(dirname, name, ds, groupby="year")
"""This modules creates mockup data for testing"""
import xarray as xr
import numpy as np
import datetime
import random
base = datetime.datetime(2000, 1, 1)
indexes = {
'time': [base + datetime.timedelta(days=9*i) for i in range(99)],
'plev': [x for x in range(1, 1000, 100)],
'lat': [x for x in range(-90, 90, 10)],
'lon': [x for x in range(-180, 180, 20)]
}
def data_vars():
"""Creates a mock n-array with coordinate values"""
dim = [len(axis) for _, axis in indexes.items()]
return tuple(indexes), np.ones(dim),
def data_coord():
"""Creates a mock coordinates"""
return indexes
def dataset(name):
"""Creates a dataset according to the global module indexes"""
return xr.Dataset(
{name: data_vars()},
coords = data_coord()
)
def netcdf(path, name, **kwarg):
"""Creates or appends data to a noise netcdf file"""
ds=dataset(name)
ds.to_netcdf(path)
- name: toz
path: Ccmi/mon/toz/toz_noise.nc
- name: tco3
path: Ecmwf/Era5/tco3_noise.nc
- name: toz
path: Ecmwf/Erai/toz_noise.nc
- name: vmro3
path: Ecmwf/Erai/vmro3_noise.nc
......@@ -2,14 +2,14 @@ CCMI-1:
IPSL:
tco3_zm:
name: toz
dir: Ccmi/mon/toz
paths: Ccmi/mon/toz/toz_????.nc
coordinates:
time: time
lat: lat
lon: lon
vmro3_zm:
name: vmro3
dir: Ccmi/mon/vmro3
paths: Ccmi/mon/vmro3/*.nc
coordinates:
time: time
plev: plev
......@@ -19,7 +19,7 @@ ECMWF:
ERA-5:
tco3_zm:
name: tco3
dir: Ecmwf/Era5
paths: Ecmwf/Era5/tco3_????.nc
coordinates:
lon: longitude
lat: latitude
......@@ -27,14 +27,14 @@ ECMWF:
ERA-i:
tco3_zm:
name: toz
dir: Ecmwf/Erai
paths: Ecmwf/Erai/toz_????.nc
coordinates:
time: time
lat: latitude
lon: longitude
vmro3_zm:
name: vmro3
dir: Ecmwf/Erai
paths: Ecmwf/Erai/vmro3_????.nc
coordinates:
time: time
plev: level
......
......@@ -2,7 +2,7 @@ ECMWF:
ERA-i:
vmro3_zm: # Correct variable
name: vmro3
dir: Ecmwf/Erai
paths: Ecmwf/Erai/vmro3_????.nc
coordinates:
time: time
plev: level
......@@ -10,7 +10,7 @@ ECMWF:
lon: longitude
tco3_zm: # Incorrect variable
name: non_existing_var
dir: Ecmwf/Erai
paths: Ecmwf/Erai/toz_????.nc
coordinates:
time: time
lat: latitude
......
......@@ -10,6 +10,7 @@ import glob
from o3skim import sources, utils
# from pyfakefs.fake_filesystem_unittest import TestCase
from . import mockup_data
from . import mockup_noise
class TestO3SKIM_sources(unittest.TestCase):
......@@ -25,22 +26,31 @@ class TestO3SKIM_sources(unittest.TestCase):
self.create_mock_datasets()
self.backup_datasets()
self.assert_with_backup()
self.create_noise_datasets()
def tearDown(self):
"""Tear down test fixtures, if any."""
def create_mock_datasets(self):
"""Creates mock data files according to the loaded configuration"""
for _, collection in self.config_base.items():
for _, variables in collection.items():
for _, vinfo in variables.items():
path = "data/" + vinfo["dir"]
os.makedirs(path, exist_ok=True)
mockup_data.netcdf(path, **vinfo)
with utils.cd('data'):
for _, collection in self.config_base.items():
for _, variables in collection.items():
for _, vinfo in variables.items():
dirname = os.path.dirname(vinfo['paths'])
os.makedirs(dirname, exist_ok=True)
mockup_data.netcdf(dirname, **vinfo)
def create_noise_datasets(self):
"""Creates noise data files according to the noise configuration"""
config_noise = utils.load("tests/noise_files.yaml")
with utils.cd('data'):
for ninfo in config_noise:
mockup_noise.netcdf(**ninfo)
def clean_output(self):
"""Cleans output removing all folders at output"""
with utils.cd("output"):
with utils.cd('output'):
directories = (d for d in os.listdir() if os.path.isdir(d))
for directory in directories:
shutil.rmtree(directory)
......@@ -48,24 +58,24 @@ class TestO3SKIM_sources(unittest.TestCase):
def backup_datasets(self):
"""Loads the mock datasets into an internal variable"""
self.ds_backup = {}
for source, collection in self.config_base.items():
self.ds_backup[source] = {}
for model, variables in collection.items():
self.ds_backup[source][model] = {}
for v, vinfo in variables.items():
paths = "data/" + vinfo["dir"] + "/*.nc"
with xr.open_mfdataset(paths) as ds:
self.ds_backup[source][model][v] = ds
with utils.cd('data'):
for source, collection in self.config_base.items():
self.ds_backup[source] = {}
for model, variables in collection.items():
self.ds_backup[source][model] = {}
for v, vinfo in variables.items():
with xr.open_mfdataset(vinfo['paths']) as ds:
self.ds_backup[source][model][v] = ds
def assert_with_backup(self):
"""Asserts the dataset in the backup is equal to the config load"""
for source, collection in self.config_base.items():
for model, variables in collection.items():
for v, vinfo in variables.items():
paths = "data/" + vinfo["dir"] + "/*.nc"
with xr.open_mfdataset(paths) as ds:
xr.testing.assert_identical(
self.ds_backup[source][model][v], ds)
with utils.cd('data'):
for source, collection in self.config_base.items():
for model, variables in collection.items():
for v, vinfo in variables.items():
with xr.open_mfdataset(vinfo['paths']) as ds:
xr.testing.assert_identical(
self.ds_backup[source][model][v], ds)
def test_001_SourcesFromConfig(self):
"""Creates the different sources from the configuration file"""
......@@ -96,6 +106,32 @@ class TestO3SKIM_sources(unittest.TestCase):
with utils.cd("output"):
[source.skim() for source in ds.values()]
# CCMI-1 data skim asserts
self.assertTrue(os.path.isdir("output/CCMI-1_IPSL"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm.nc"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/vmro3_zm.nc"))
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ECMWF_ERA-5"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-5/tco3_zm.nc"))
self.assertTrue(os.path.isdir("output/ECMWF_ERA-i"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/tco3_zm.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm.nc"))
# Checks the original data has not been modified
self.assert_with_backup()
# Removes output data for other tests
self.clean_output()
def test_003_OutputSplitByYear(self):
"""Skims the data into the output folder spliting by year"""
with utils.cd("data"):
ds = {name: sources.Source(name, collection) for
name, collection in self.config_base.items()}
with utils.cd("output"):
[source.skim(groupby="year") for source in ds.values()]
# CCMI-1 data skim asserts
self.assertTrue(os.path.isdir("output/CCMI-1_IPSL"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm_2000.nc"))
......@@ -113,7 +149,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# Removes output data for other tests
self.clean_output()
def test_003_SourceErrorDontBreak(self):
def test_004_SourceErrorDontBreak(self):
"""The execution does not stop by an error in source"""
with utils.cd("data"):
ds = {name: sources.Source(name, collection) for
......@@ -124,7 +160,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ECMWF_ERA-i"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm.nc"))
# Checks the original data has not been modified
self.assert_with_backup()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment