Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
synergy
o3skim
Commits
619b4b8d
Commit
619b4b8d
authored
Oct 26, 2020
by
BorjaEst
Browse files
Merge remote-tracking branch 'scclab/23-input-parameter-files-split'
parents
3d6d34a5
fb869bc7
Changes
5
Hide whitespace changes
Inline
Side-by-side
main
View file @
619b4b8d
...
...
@@ -16,6 +16,9 @@ def cmdline_args():
# Arguments
p
.
add_argument
(
"-f"
,
"--sources_file"
,
type
=
str
,
default
=
"./sources.yaml"
,
help
=
"custom sources YAML configuration (default: %(default)s)"
)
p
.
add_argument
(
"-s"
,
"--split_by"
,
type
=
str
,
default
=
None
,
choices
=
[
'year'
,
'decade'
],
help
=
"Period time to split output (default: %(default)s)"
)
p
.
add_argument
(
"-v"
,
"--verbosity"
,
type
=
str
,
default
=
'ERROR'
,
choices
=
[
'DEBUG'
,
'INFO'
,
'WARNING'
,
'ERROR'
,
'CRITICAL'
],
help
=
"Sets the logging level (default: %(default)s)"
)
...
...
@@ -42,4 +45,4 @@ if __name__ == '__main__':
# Skim output
logging
.
info
(
"Skimming data to './output' "
)
with
utils
.
cd
(
"output"
):
[
source
.
skim
()
for
source
in
ds
.
values
()]
[
source
.
skim
(
groupby
=
args
.
split_by
)
for
source
in
ds
.
values
()]
o3skim/sources.py
View file @
619b4b8d
...
...
@@ -19,12 +19,12 @@ class Source:
logging
.
info
(
"Load model '%s'"
,
name
)
self
.
_models
[
name
]
=
Model
(
variables
)
def
skim
(
self
):
def
skim
(
self
,
groupby
=
None
):
for
name
,
model
in
self
.
_models
.
items
():
dirname
=
self
.
_name
+
"_"
+
name
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
logger
.
info
(
"Skim data from '%s'"
,
dirname
)
model
.
skim
(
dirname
)
model
.
skim
(
dirname
,
groupby
)
class
Model
:
...
...
@@ -38,13 +38,13 @@ class Model:
logger
.
debug
(
"Load 'vmro3_zm' data"
)
self
.
__get_vmro3_zm
(
**
variables
)
def
skim
(
self
,
dirname
):
def
skim
(
self
,
dirname
,
groupby
=
None
):
if
hasattr
(
self
,
'_tco3_zm'
):
logger
.
debug
(
"Skim 'tco3_zm' data"
)
utils
.
to_netcdf
(
dirname
,
"tco3_zm"
,
self
.
_tco3_zm
)
utils
.
to_netcdf
(
dirname
,
"tco3_zm"
,
self
.
_tco3_zm
,
groupby
)
if
hasattr
(
self
,
'_vmro3_zm'
):
logger
.
debug
(
"Skim 'vmro3_zm' data"
)
utils
.
to_netcdf
(
dirname
,
"vmro3_zm"
,
self
.
_vmro3_zm
)
utils
.
to_netcdf
(
dirname
,
"vmro3_zm"
,
self
.
_vmro3_zm
,
groupby
)
@
utils
.
return_on_failure
(
"Error when loading 'tco3_zm'"
)
def
__get_tco3_zm
(
self
,
tco3_zm
,
**
kwarg
):
...
...
o3skim/utils.py
View file @
619b4b8d
...
...
@@ -50,10 +50,33 @@ def create_empty_netCDF(fname):
root_grp
.
close
()
def
to_netcdf
(
dirname
,
name
,
dataset
):
def
to_netcdf
(
dirname
,
name
,
dataset
,
groupby
=
None
):
"""Creates or appends data to named netcdf files"""
years
,
dsx
=
zip
(
*
dataset
.
groupby
(
"time.year"
))
fnames
=
[
dirname
+
"/"
+
name
+
"_%s.nc"
%
y
for
y
in
years
]
def
split_by_year
(
dataset
):
"""Splits a dataset by year"""
years
,
dsx
=
zip
(
*
dataset
.
groupby
(
"time.year"
))
fnames
=
[
dirname
+
"/"
+
name
+
"_%s.nc"
%
y
for
y
in
years
]
return
fnames
,
dsx
def
split_by_decade
(
dataset
):
"""Splits a dataset by decade"""
decades
=
dataset
.
indexes
[
"time"
].
year
//
10
*
10
decades
,
dsx
=
zip
(
*
dataset
.
groupby
(
xr
.
DataArray
(
decades
)))
fnames
=
[
dirname
+
"/"
+
name
+
"_%s-%s.nc"
%
(
d
,
d
+
10
)
for
d
in
decades
]
return
fnames
,
dsx
def
no_split
(
dataset
):
"""Does not split a dataset"""
dsx
=
(
dataset
,)
fnames
=
[
dirname
+
"/"
+
name
+
".nc"
]
return
fnames
,
dsx
split_by
=
{
"year"
:
split_by_year
,
"decade"
:
split_by_decade
}
fnames
,
dsx
=
split_by
.
get
(
groupby
,
no_split
)(
dataset
)
logging
.
info
(
"Save dataset into: %s"
,
fnames
)
[
create_empty_netCDF
(
fn
)
for
fn
in
fnames
if
not
os
.
path
.
isfile
(
fn
)]
xr
.
save_mfdataset
(
dsx
,
fnames
,
mode
=
'a'
)
tests/mockup_data.py
View file @
619b4b8d
...
...
@@ -39,4 +39,4 @@ def dataset(name, coordinades):
def
netcdf
(
dirname
,
name
,
coordinades
,
**
kwarg
):
"""Creates or appends data to a mock netcdf file"""
ds
=
dataset
(
name
,
coordinades
)
utils
.
to_netcdf
(
dirname
,
name
,
ds
)
utils
.
to_netcdf
(
dirname
,
name
,
ds
,
groupby
=
"year"
)
tests/test_o3skim.py
View file @
619b4b8d
...
...
@@ -106,6 +106,32 @@ class TestO3SKIM_sources(unittest.TestCase):
with
utils
.
cd
(
"output"
):
[
source
.
skim
()
for
source
in
ds
.
values
()]
# CCMI-1 data skim asserts
self
.
assertTrue
(
os
.
path
.
isdir
(
"output/CCMI-1_IPSL"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/CCMI-1_IPSL/tco3_zm.nc"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/CCMI-1_IPSL/vmro3_zm.nc"
))
# ECMWF data skim asserts
self
.
assertTrue
(
os
.
path
.
isdir
(
"output/ECMWF_ERA-5"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/ECMWF_ERA-5/tco3_zm.nc"
))
self
.
assertTrue
(
os
.
path
.
isdir
(
"output/ECMWF_ERA-i"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/ECMWF_ERA-i/tco3_zm.nc"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/ECMWF_ERA-i/vmro3_zm.nc"
))
# Checks the original data has not been modified
self
.
assert_with_backup
()
# Removes output data for other tests
self
.
clean_output
()
def
test_003_OutputSplitByYear
(
self
):
"""Skims the data into the output folder spliting by year"""
with
utils
.
cd
(
"data"
):
ds
=
{
name
:
sources
.
Source
(
name
,
collection
)
for
name
,
collection
in
self
.
config_base
.
items
()}
with
utils
.
cd
(
"output"
):
[
source
.
skim
(
groupby
=
"year"
)
for
source
in
ds
.
values
()]
# CCMI-1 data skim asserts
self
.
assertTrue
(
os
.
path
.
isdir
(
"output/CCMI-1_IPSL"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/CCMI-1_IPSL/tco3_zm_2000.nc"
))
...
...
@@ -123,7 +149,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# Removes output data for other tests
self
.
clean_output
()
def
test_00
3
_SourceErrorDontBreak
(
self
):
def
test_00
4
_SourceErrorDontBreak
(
self
):
"""The execution does not stop by an error in source"""
with
utils
.
cd
(
"data"
):
ds
=
{
name
:
sources
.
Source
(
name
,
collection
)
for
...
...
@@ -134,7 +160,7 @@ class TestO3SKIM_sources(unittest.TestCase):
# ECMWF data skim asserts
self
.
assertTrue
(
os
.
path
.
isdir
(
"output/ECMWF_ERA-i"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/ECMWF_ERA-i/vmro3_zm
_2000
.nc"
))
self
.
assertTrue
(
os
.
path
.
exists
(
"output/ECMWF_ERA-i/vmro3_zm.nc"
))
# Checks the original data has not been modified
self
.
assert_with_backup
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment