Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
synergy
o3skim
Commits
3d6d34a5
Commit
3d6d34a5
authored
Oct 26, 2020
by
BorjaEst
Browse files
Merge remote-tracking branch 'scclab/22-other-nc-files-in-folder-conflict-the-loading'
parents
ac73a291
27662354
Changes
9
Show whitespace changes
Inline
Side-by-side
o3skim/sources.py
View file @
3d6d34a5
...
...
@@ -21,10 +21,10 @@ class Source:
def
skim
(
self
):
for
name
,
model
in
self
.
_models
.
items
():
path
=
self
.
_name
+
"_"
+
name
os
.
makedirs
(
path
,
exist_ok
=
True
)
logger
.
info
(
"Skim data from '%s'"
,
path
)
model
.
skim
(
path
)
dirname
=
self
.
_name
+
"_"
+
name
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
logger
.
info
(
"Skim data from '%s'"
,
dirname
)
model
.
skim
(
dirname
)
class
Model
:
...
...
@@ -38,19 +38,18 @@ class Model:
logger
.
debug
(
"Load 'vmro3_zm' data"
)
self
.
__get_vmro3_zm
(
**
variables
)
def
skim
(
self
,
path
):
def
skim
(
self
,
dirname
):
if
hasattr
(
self
,
'_tco3_zm'
):
logger
.
debug
(
"Skim 'tco3_zm' data"
)
utils
.
to_netcdf
(
path
,
"tco3_zm"
,
self
.
_tco3_zm
)
utils
.
to_netcdf
(
dirname
,
"tco3_zm"
,
self
.
_tco3_zm
)
if
hasattr
(
self
,
'_vmro3_zm'
):
logger
.
debug
(
"Skim 'vmro3_zm' data"
)
utils
.
to_netcdf
(
path
,
"vmro3_zm"
,
self
.
_vmro3_zm
)
utils
.
to_netcdf
(
dirname
,
"vmro3_zm"
,
self
.
_vmro3_zm
)
@
utils
.
return_on_failure
(
"Error when loading 'tco3_zm'"
)
def
__get_tco3_zm
(
self
,
tco3_zm
,
**
kwarg
):
"""Gets and standarises the tco3_zm data"""
fnames
=
glob
.
glob
(
tco3_zm
[
'dir'
]
+
"/*.nc"
)
with
xr
.
open_mfdataset
(
fnames
)
as
dataset
:
with
xr
.
open_mfdataset
(
tco3_zm
[
'paths'
])
as
dataset
:
dataset
=
dataset
.
rename
({
tco3_zm
[
'name'
]:
'tco3_zm'
,
tco3_zm
[
'coordinades'
][
'time'
]:
'time'
,
...
...
@@ -62,8 +61,7 @@ class Model:
@
utils
.
return_on_failure
(
"Error when loading 'vmro3_zm'"
)
def
__get_vmro3_zm
(
self
,
vmro3_zm
,
**
kwarg
):
"""Gets and standarises the vmro3_zm data"""
fnames
=
glob
.
glob
(
vmro3_zm
[
'dir'
]
+
"/*.nc"
)
with
xr
.
open_mfdataset
(
fnames
)
as
dataset
:
with
xr
.
open_mfdataset
(
vmro3_zm
[
'paths'
])
as
dataset
:
dataset
=
dataset
.
rename
({
vmro3_zm
[
'name'
]:
'vmro3_zm'
,
vmro3_zm
[
'coordinades'
][
'time'
]:
'time'
,
...
...
o3skim/utils.py
View file @
3d6d34a5
...
...
@@ -50,10 +50,10 @@ def create_empty_netCDF(fname):
root_grp
.
close
()
def
to_netcdf
(
path
,
name
,
dataset
):
def
to_netcdf
(
dirname
,
name
,
dataset
):
"""Creates or appends data to named netcdf files"""
years
,
dsx
=
zip
(
*
dataset
.
groupby
(
"time.year"
))
fnames
=
[
path
+
"/"
+
name
+
"_%s.nc"
%
y
for
y
in
years
]
fnames
=
[
dirname
+
"/"
+
name
+
"_%s.nc"
%
y
for
y
in
years
]
logging
.
info
(
"Save dataset into: %s"
,
fnames
)
[
create_empty_netCDF
(
fn
)
for
fn
in
fnames
if
not
os
.
path
.
isfile
(
fn
)]
xr
.
save_mfdataset
(
dsx
,
fnames
,
mode
=
'a'
)
sources_example.yaml
View file @
3d6d34a5
...
...
@@ -23,7 +23,7 @@
# expected to have only one folder output named "CCMI-1_IPSL".
#
# This model has 2 variables (tco3_zm and vmro3_zm) which datasets are
# located in different directories. Therefore the key '
dir
' is different
# located in different directories. Therefore the key '
paths
' is different
# in both of them. Therefore, the output expected at "CCMI-1_IPSL" is
# 2 type of files:
# - tco3_zm_[YEAR].nc: With tco3 skimmed data
...
...
@@ -44,7 +44,7 @@ CCMI-1:
name
:
toz
# Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir
:
Ccmi/mon/toz
paths
:
Ccmi/mon/toz
/*.nc
# Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]:
coordinades
:
...
...
@@ -62,7 +62,7 @@ CCMI-1:
name
:
vmro3
# Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir
:
Ccmi/mon/vmro3
paths
:
Ccmi/mon/vmro3
/*_????.nc
# Coordinades description for vmro3 data.
# [FIXED_KEY -- MANDATORY]:
coordinades
:
...
...
@@ -85,7 +85,7 @@ CCMI-1:
#
# This case of ERA-i indeed has 2 variables (tco3_zm and vmro3_zm) but in
# this case, are located inside the same dataset files, therefore the
# key '
dir
' is the same in both variables. The output expected at
# key '
paths
' is the same in both variables. The output expected at
# "ECMWF_ERA-5" is 2 type of files:
# - tco3_zm_[YEAR].nc: With tco3 skimmed data
# - vmro3_zm_[YEAR].nc: With vmro3 skimmed data
...
...
@@ -105,7 +105,7 @@ ECMWF:
name
:
tco3
# Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir
:
Ecmwf/Era5
paths
:
Ecmwf/Era5
/*.nc
# Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]:
coordinades
:
...
...
@@ -123,7 +123,7 @@ ECMWF:
name
:
toz
# Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/*.nc
# Coordinades description for tco3 data.
# [FIXED_KEY -- MANDATORY]:
coordinades
:
...
...
@@ -141,7 +141,7 @@ ECMWF:
name
:
vmro3
# Path where to find the netCDF files
# [FIXED_KEY -- MANDATORY]: [CORRECT_VALUE -- MANDATORY]
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/*.nc
# Coordinades description for vmro3 data.
# [FIXED_KEY -- MANDATORY]:
coordinades
:
...
...
tests/mockup_data.py
View file @
3d6d34a5
...
...
@@ -2,9 +2,8 @@
import
xarray
as
xr
import
numpy
as
np
import
netCDF4
import
os.path
import
datetime
import
os
from
o3skim
import
utils
...
...
@@ -37,7 +36,7 @@ def dataset(name, coordinades):
)
def
netcdf
(
path
,
name
,
coordinades
,
**
kwarg
):
def
netcdf
(
dirname
,
name
,
coordinades
,
**
kwarg
):
"""Creates or appends data to a mock netcdf file"""
ds
=
dataset
(
name
,
coordinades
)
utils
.
to_netcdf
(
path
,
name
,
ds
)
utils
.
to_netcdf
(
dirname
,
name
,
ds
)
tests/mockup_noise.py
0 → 100644
View file @
3d6d34a5
"""This modules creates mockup data for testing"""
import
xarray
as
xr
import
numpy
as
np
import
datetime
import
random
base
=
datetime
.
datetime
(
2000
,
1
,
1
)
indexes
=
{
'time'
:
[
base
+
datetime
.
timedelta
(
days
=
9
*
i
)
for
i
in
range
(
99
)],
'plev'
:
[
x
for
x
in
range
(
1
,
1000
,
100
)],
'lat'
:
[
x
for
x
in
range
(
-
90
,
90
,
10
)],
'lon'
:
[
x
for
x
in
range
(
-
180
,
180
,
20
)]
}
def
data_vars
():
"""Creates a mock n-array with coordinade values"""
dim
=
[
len
(
axis
)
for
_
,
axis
in
indexes
.
items
()]
return
tuple
(
indexes
),
np
.
ones
(
dim
),
def
data_coord
():
"""Creates a mock coordinades"""
return
indexes
def
dataset
(
name
):
"""Creates a dataset acording to the global module indexes"""
return
xr
.
Dataset
(
{
name
:
data_vars
()},
coords
=
data_coord
()
)
def
netcdf
(
path
,
name
,
**
kwarg
):
"""Creates or appends data to a noise netcdf file"""
ds
=
dataset
(
name
)
ds
.
to_netcdf
(
path
)
tests/noise_files.yaml
0 → 100644
View file @
3d6d34a5
-
name
:
toz
path
:
Ccmi/mon/toz/toz_noise.nc
-
name
:
tco3
path
:
Ecmwf/Era5/tco3_noise.nc
-
name
:
toz
path
:
Ecmwf/Erai/toz_noise.nc
-
name
:
vmro3
path
:
Ecmwf/Erai/vmro3_noise.nc
tests/sources_base.yaml
View file @
3d6d34a5
...
...
@@ -2,14 +2,14 @@ CCMI-1:
IPSL
:
tco3_zm
:
name
:
toz
dir
:
Ccmi/mon/toz
paths
:
Ccmi/mon/toz
/toz_????.nc
coordinades
:
time
:
time
lat
:
lat
lon
:
lon
vmro3_zm
:
name
:
vmro3
dir
:
Ccmi/mon/vmro3
paths
:
Ccmi/mon/vmro3
/*.nc
coordinades
:
time
:
time
plev
:
plev
...
...
@@ -19,7 +19,7 @@ ECMWF:
ERA-5
:
tco3_zm
:
name
:
tco3
dir
:
Ecmwf/Era5
paths
:
Ecmwf/Era5
/tco3_????.nc
coordinades
:
lon
:
longitude
lat
:
latitude
...
...
@@ -27,14 +27,14 @@ ECMWF:
ERA-i
:
tco3_zm
:
name
:
toz
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/toz_????.nc
coordinades
:
time
:
time
lat
:
latitude
lon
:
longitude
vmro3_zm
:
name
:
vmro3
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/vmro3_????.nc
coordinades
:
time
:
time
plev
:
level
...
...
tests/sources_err.yaml
View file @
3d6d34a5
...
...
@@ -2,7 +2,7 @@ ECMWF:
ERA-i
:
vmro3_zm
:
# Correct variable
name
:
vmro3
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/vmro3_????.nc
coordinades
:
time
:
time
plev
:
level
...
...
@@ -10,7 +10,7 @@ ECMWF:
lon
:
longitude
tco3_zm
:
# Incorrect variable
name
:
non_existing_var
dir
:
Ecmwf/Erai
paths
:
Ecmwf/Erai
/toz_????.nc
coordinades
:
time
:
time
lat
:
latitude
...
...
tests/test_o3skim.py
View file @
3d6d34a5
...
...
@@ -10,6 +10,7 @@ import glob
from
o3skim
import
sources
,
utils
# from pyfakefs.fake_filesystem_unittest import TestCase
from
.
import
mockup_data
from
.
import
mockup_noise
class
TestO3SKIM_sources
(
unittest
.
TestCase
):
...
...
@@ -25,22 +26,31 @@ class TestO3SKIM_sources(unittest.TestCase):
self
.
create_mock_datasets
()
self
.
backup_datasets
()
self
.
assert_with_backup
()
self
.
create_noise_datasets
()
def
tearDown
(
self
):
"""Tear down test fixtures, if any."""
def
create_mock_datasets
(
self
):
"""Creates mock data files according to the loaded configuration"""
with
utils
.
cd
(
'data'
):
for
_
,
collection
in
self
.
config_base
.
items
():
for
_
,
variables
in
collection
.
items
():
for
_
,
vinfo
in
variables
.
items
():
path
=
"data/"
+
vinfo
[
"dir"
]
os
.
makedirs
(
path
,
exist_ok
=
True
)
mockup_data
.
netcdf
(
path
,
**
vinfo
)
dirname
=
os
.
path
.
dirname
(
vinfo
[
'paths'
])
os
.
makedirs
(
dirname
,
exist_ok
=
True
)
mockup_data
.
netcdf
(
dirname
,
**
vinfo
)
def
create_noise_datasets
(
self
):
"""Creates noise data files according to the noise configuration"""
config_noise
=
utils
.
load
(
"tests/noise_files.yaml"
)
with
utils
.
cd
(
'data'
):
for
ninfo
in
config_noise
:
mockup_noise
.
netcdf
(
**
ninfo
)
def
clean_output
(
self
):
"""Cleans output removing all folders at output"""
with
utils
.
cd
(
"
output
"
):
with
utils
.
cd
(
'
output
'
):
directories
=
(
d
for
d
in
os
.
listdir
()
if
os
.
path
.
isdir
(
d
))
for
directory
in
directories
:
shutil
.
rmtree
(
directory
)
...
...
@@ -48,22 +58,22 @@ class TestO3SKIM_sources(unittest.TestCase):
def
backup_datasets
(
self
):
"""Loads the mock datasets into an internal variable"""
self
.
ds_backup
=
{}
with
utils
.
cd
(
'data'
):
for
source
,
collection
in
self
.
config_base
.
items
():
self
.
ds_backup
[
source
]
=
{}
for
model
,
variables
in
collection
.
items
():
self
.
ds_backup
[
source
][
model
]
=
{}
for
v
,
vinfo
in
variables
.
items
():
paths
=
"data/"
+
vinfo
[
"dir"
]
+
"/*.nc"
with
xr
.
open_mfdataset
(
paths
)
as
ds
:
with
xr
.
open_mfdataset
(
vinfo
[
'paths'
])
as
ds
:
self
.
ds_backup
[
source
][
model
][
v
]
=
ds
def
assert_with_backup
(
self
):
"""Asserts the dataset in the backup is equal to the config load"""
with
utils
.
cd
(
'data'
):
for
source
,
collection
in
self
.
config_base
.
items
():
for
model
,
variables
in
collection
.
items
():
for
v
,
vinfo
in
variables
.
items
():
paths
=
"data/"
+
vinfo
[
"dir"
]
+
"/*.nc"
with
xr
.
open_mfdataset
(
paths
)
as
ds
:
with
xr
.
open_mfdataset
(
vinfo
[
'paths'
])
as
ds
:
xr
.
testing
.
assert_identical
(
self
.
ds_backup
[
source
][
model
][
v
],
ds
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment