Commit 619b4b8d authored by BorjaEst's avatar BorjaEst
Browse files

Merge remote-tracking branch 'scclab/23-input-parameter-files-split'

parents 3d6d34a5 fb869bc7
......@@ -16,6 +16,9 @@ def cmdline_args():
# Arguments
p.add_argument("-f", "--sources_file", type=str, default="./sources.yaml",
help="custom sources YAML configuration (default: %(default)s)")
p.add_argument("-s", "--split_by", type=str, default=None,
choices=['year', 'decade'],
help="Period time to split output (default: %(default)s)")
p.add_argument("-v", "--verbosity", type=str, default='ERROR',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help="Sets the logging level (default: %(default)s)")
......@@ -42,4 +45,4 @@ if __name__ == '__main__':
# Skim output
logging.info("Skimming data to './output' ")
with utils.cd("output"):
[source.skim() for source in ds.values()]
[source.skim(groupby=args.split_by) for source in ds.values()]
......@@ -19,12 +19,12 @@ class Source:
logging.info("Load model '%s'", name)
self._models[name] = Model(variables)
def skim(self):
def skim(self, groupby=None):
for name, model in self._models.items():
dirname = self._name + "_" + name
os.makedirs(dirname, exist_ok=True)
logger.info("Skim data from '%s'", dirname)
model.skim(dirname)
model.skim(dirname, groupby)
class Model:
......@@ -38,13 +38,13 @@ class Model:
logger.debug("Load 'vmro3_zm' data")
self.__get_vmro3_zm(**variables)
def skim(self, dirname):
def skim(self, dirname, groupby=None):
if hasattr(self, '_tco3_zm'):
logger.debug("Skim 'tco3_zm' data")
utils.to_netcdf(dirname, "tco3_zm", self._tco3_zm)
utils.to_netcdf(dirname, "tco3_zm", self._tco3_zm, groupby)
if hasattr(self, '_vmro3_zm'):
logger.debug("Skim 'vmro3_zm' data")
utils.to_netcdf(dirname, "vmro3_zm", self._vmro3_zm)
utils.to_netcdf(dirname, "vmro3_zm", self._vmro3_zm, groupby)
@utils.return_on_failure("Error when loading 'tco3_zm'")
def __get_tco3_zm(self, tco3_zm, **kwarg):
......
......@@ -50,10 +50,33 @@ def create_empty_netCDF(fname):
root_grp.close()
def to_netcdf(dirname, name, dataset):
def to_netcdf(dirname, name, dataset, groupby=None):
"""Creates or appends data to named netcdf files"""
years, dsx = zip(*dataset.groupby("time.year"))
fnames = [dirname + "/" + name + "_%s.nc" % y for y in years]
def split_by_year(dataset):
"""Splits a dataset by year"""
years, dsx = zip(*dataset.groupby("time.year"))
fnames = [dirname + "/" + name + "_%s.nc" % y for y in years]
return fnames, dsx
def split_by_decade(dataset):
"""Splits a dataset by decade"""
decades = dataset.indexes["time"].year//10*10
decades, dsx = zip(*dataset.groupby(xr.DataArray(decades)))
fnames = [dirname + "/" + name + "_%s-%s.nc" % (d, d+10) for d in decades]
return fnames, dsx
def no_split(dataset):
"""Does not split a dataset"""
dsx = (dataset,)
fnames = [dirname + "/" + name + ".nc"]
return fnames, dsx
split_by = {
"year": split_by_year,
"decade": split_by_decade
}
fnames, dsx = split_by.get(groupby, no_split)(dataset)
logging.info("Save dataset into: %s", fnames)
[create_empty_netCDF(fn) for fn in fnames if not os.path.isfile(fn)]
xr.save_mfdataset(dsx, fnames, mode='a')
......@@ -39,4 +39,4 @@ def dataset(name, coordinades):
def netcdf(dirname, name, coordinades, **kwarg):
"""Creates or appends data to a mock netcdf file"""
ds = dataset(name, coordinades)
utils.to_netcdf(dirname, name, ds)
utils.to_netcdf(dirname, name, ds, groupby="year")
......@@ -106,6 +106,32 @@ class TestO3SKIM_sources(unittest.TestCase):
with utils.cd("output"):
[source.skim() for source in ds.values()]
# CCMI-1 data skim asserts
self.assertTrue(os.path.isdir("output/CCMI-1_IPSL"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm.nc"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/vmro3_zm.nc"))
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ECMWF_ERA-5"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-5/tco3_zm.nc"))
self.assertTrue(os.path.isdir("output/ECMWF_ERA-i"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/tco3_zm.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm.nc"))
# Checks the original data has not been modified
self.assert_with_backup()
# Removes output data for other tests
self.clean_output()
def test_003_OutputSplitByYear(self):
"""Skims the data into the output folder spliting by year"""
with utils.cd("data"):
ds = {name: sources.Source(name, collection) for
name, collection in self.config_base.items()}
with utils.cd("output"):
[source.skim(groupby="year") for source in ds.values()]
# CCMI-1 data skim asserts
self.assertTrue(os.path.isdir("output/CCMI-1_IPSL"))
self.assertTrue(os.path.exists("output/CCMI-1_IPSL/tco3_zm_2000.nc"))
......@@ -123,7 +149,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# Removes output data for other tests
self.clean_output()
def test_003_SourceErrorDontBreak(self):
def test_004_SourceErrorDontBreak(self):
"""The execution does not stop by an error in source"""
with utils.cd("data"):
ds = {name: sources.Source(name, collection) for
......@@ -134,7 +160,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# ECMWF data skim asserts
self.assertTrue(os.path.isdir("output/ECMWF_ERA-i"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm_2000.nc"))
self.assertTrue(os.path.exists("output/ECMWF_ERA-i/vmro3_zm.nc"))
# Checks the original data has not been modified
self.assert_with_backup()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment