Commit 4d37f894 authored by BorjaEst's avatar BorjaEst
Browse files

First draft to solve __main__ execution and testing

parent ecf727e6
"""O3as package with utilities to handle ozone data skimming."""
from o3skim import operations
import logging
from functools import reduce
import xarray as xr
logger = logging.getLogger('o3skim')
from o3skim import operations
logger = logging.getLogger("o3skim")
def process(dataset, actions):
......@@ -29,12 +32,7 @@ def process(dataset, actions):
:rtype: :class:`xarray.Dataset`
"""
logger.debug("Processing queue: %s", actions)
actions = actions.copy() # Do not edit original
operation = actions.pop()
processed = operations.run(operation, dataset)
if actions != []:
processed = process(processed, actions)
return processed
return reduce(operations.run, actions, dataset)
def save(dataset, target, split_by=None):
......@@ -49,24 +47,37 @@ def save(dataset, target, split_by=None):
:param dataset: DataSet to save in the target.
:type dataset: :class:`xarray.DataSet`
:param target: Location where to save followed by the name prefix.
:param target: Location where to save followed by the name prefix.
:type target: str
:param split_by: Type of saving format to apply.
:type split_by: str, optional
"""
if not split_by:
def path(_): return "{}.nc".format(target)
def path(_):
return "{}.nc".format(target)
groups = [(None, dataset)]
elif split_by == 'year':
def path(y): return "{}_{}-{}.nc".format(target, y, y + 1)
def delta_map(x): return x.year
years = dataset.indexes['time'].map(delta_map)
elif split_by == "year":
def path(y):
return "{}_{}-{}.nc".format(target, y, y + 1)
def delta_map(x):
return x.year
years = dataset.indexes["time"].map(delta_map)
groups = dataset.groupby(xr.DataArray(years))
elif split_by == 'decade':
def path(y): return "{}_{}-{}.nc".format(target, y, y + 10)
def delta_map(x): return x.year // 10 * 10
years = dataset.indexes['time'].map(delta_map)
elif split_by == "decade":
def path(y):
return "{}_{}-{}.nc".format(target, y, y + 10)
def delta_map(x):
return x.year // 10 * 10
years = dataset.indexes["time"].map(delta_map)
groups = dataset.groupby(xr.DataArray(years))
else:
message = "Bad input split_by '{}' use None, 'year' or 'decade'"
......@@ -74,11 +85,13 @@ def save(dataset, target, split_by=None):
years, datasets = tuple(zip(*groups))
try:
xr.save_mfdataset(
mode='a',
mode="a",
datasets=[dataset for dataset in datasets],
paths=[path(year) for year in years])
paths=[path(year) for year in years],
)
except FileNotFoundError:
xr.save_mfdataset(
mode='w',
mode="w",
datasets=[dataset for dataset in datasets],
paths=[path(year) for year in years])
paths=[path(year) for year in years],
)
......@@ -11,7 +11,7 @@ import xarray as xr
# Script logger setup
logger = logging.getLogger("o3norm")
logger = logging.getLogger("o3skim")
def main():
......@@ -31,6 +31,15 @@ def parser() -> None:
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
# Positional arguments
parser.add_argument(
"paths",
nargs="+",
type=str,
action="store",
help="Paths to netCDF files with the data to skim",
)
# Optional arguments
parser.add_argument(
"-v",
"--verbosity",
......@@ -43,19 +52,17 @@ def parser() -> None:
"-o",
"--output",
type=str,
default=".",
help="Folder for output files (default: %(default)s)",
default="toz-skimmed.nc",
help="Folder for output file (default: %(default)s)",
)
parser.add_argument(
"paths",
nargs="+",
"--mode",
type=str,
action="store",
help="Paths to netCDF files with the data to skim",
default="w",
help="Write (‘w’) or append (‘a’) mode (default: %(default)s)",
)
# Available operations group
operations = parser.add_argument_group("operations")
# Available operations
operations = parser.add_argument_group("available operations")
operations.add_argument(
"--lon_mean",
action="append_const",
......@@ -77,13 +84,14 @@ def parser() -> None:
const="year_mean",
help="Time average accross the year",
)
return parser
def run_command(verbosity, operations, output, paths):
def run_command(paths, operations, **options):
# Set logging level
logging.basicConfig(
level=getattr(logging, verbosity),
level=getattr(logging, options["verbosity"]),
format="%(asctime)s %(name)-24s %(levelname)-8s %(message)s",
)
......@@ -92,15 +100,15 @@ def run_command(verbosity, operations, output, paths):
# Loading of DataArray and attributes
logger.info("Data loading from %s", paths)
dataset = xr.open_mfdataset(paths)
dataset = xr.open_mfdataset(paths, concat_dim="time", combine="nested")
# Processing of skimming operations
logger.info("Data skimming using %s", operations)
skimmed = o3skim.process(dataset, operations)
# Saving
logger.info("Staving result into %s", output)
o3skim.save(skimmed, f"{output}/skimmed")
logger.info("Staving result into %s", options["output"])
skimmed.to_netcdf(options["output"], mode=options["mode"])
# End of program
logger.info("End of program")
......
......@@ -9,19 +9,19 @@ xr.set_options(keep_attrs=True) # Keep attributes
toz_standard_name = "atmosphere_mole_content_of_ozone"
def run(operation, dataset):
def run(dataset, operation):
"""Main entry point for operation call on o3skimming functions:
:lon_mean: Longitudinal mean across the dataset.
:lat_mean: Latitudinal mean across the dataset.
:year_mean: Time coordinate averaged by year.
:param operation: Operation name to perform.
:type operation: str
:param dataset: Original o3 dataset where to perform operations.
:type dataset: :class:`xarray.Dataset`
:param operation: Operation name to perform.
:type operation: str
:return: Dataset after processing the specified operation.
:rtype: :class:`xarray.Dataset`
"""
......
......@@ -2,7 +2,7 @@
import os
import o3mocks
import xarray
import xarray as xr
from pytest import fixture
......@@ -22,5 +22,18 @@ def netCDF_file(request):
@fixture(scope="module")
def dataset(netCDF_file):
return xarray.open_dataset(netCDF_file)
def netCDF_files(netCDF_file):
ds = xr.open_dataset(netCDF_file)
dates, datasets = zip(*ds.groupby("time"))
paths = [f"{netCDF_file[:-3]}_{t}.nc" for t in dates]
xr.save_mfdataset(datasets, paths)
return paths
@fixture(scope="module")
def dataset(netCDF_files):
return xr.open_mfdataset(
paths=netCDF_files,
concat_dim="time",
combine="nested",
)
"""Test definitions and groups"""
import xarray as xr
from o3skim import __main__
from pytest import fixture
import o3skim
class Skimmed:
@fixture(scope="class", autouse=True) # Required
@fixture(scope="class", autouse=True) # Needs parametrization
def operation(self, request):
return request.param
@fixture(scope="class", params=[]) # Default None
def extra(self, request):
return request.param
@fixture(scope="class")
def operations(self, extra, operation):
return [operation] + extra
def operations(self, operation):
return [operation]
@fixture(scope="class")
def skimmed(self, dataset, operations):
......
"""Simple test module for testing"""
import subprocess
import cf_xarray as cfxr
import xarray as xr
from pytest import fixture, mark, skip
@fixture(scope="module")
def paths(netCDF_files):
return " ".join(netCDF_files)
@fixture(scope="module", params=["toz-skimmed.nc"])
def output(request):
return request.param
@fixture(scope="module", params=["lon_mean", "lat_mean", "year_mean"])
def operation(request):
return request.param
@fixture(scope="module", autouse=True)
def process(paths, output, operation):
command = f"python -m o3skim -o {output} --{operation} {paths}"
return subprocess.run(command, shell=True)
@fixture(scope="module")
def skimmed(process, output):
if process.returncode != 0:
skip("processing failed, no sense to continue")
return xr.open_dataset(output)
@fixture(scope="module")
def conventions(skimmed, output):
command = f"cfchecks {output}"
return subprocess.run(command, shell=True)
def test_command_success(process):
assert process.returncode == 0
@mark.usefixtures("skimmed")
def test_originals_not_edited(dataset):
assert dataset
@mark.usefixtures("skimmed")
def test_cf_conventions(conventions):
assert conventions.returncode == 0
"""Simple test module for testing"""
import subprocess
import cf_xarray as cfxr
import numpy as np
from pytest import fixture, mark
from tests.utils import all_perm
def test_original_not_edited(netCDF_file):
pass
def test_cf_conventions(netCDF_file):
command = "cfchecks {}".format(netCDF_file)
process = subprocess.run(command, shell=True)
assert process.returncode == 0
......@@ -2,71 +2,68 @@
import cf_xarray as cfxr
import numpy as np
from pytest import fixture, mark
from tests.utils import all_perm
from . import Skimmed
class CommonTests(Skimmed):
def test_toz_var_attrs(self, dataset, skimmed, operations):
def test_toz_var_attrs(self, skimmed):
var = skimmed.cf["atmosphere_mole_content_of_ozone"]
assert var.attrs["units"] == "mol m-2"
assert "area: mean" in var.cell_methods
if "year_mean" in operations:
assert "time: mean (interval: 1 years)" in var.cell_methods
@mark.parametrize("operation", ["year_mean"], indirect=True)
@mark.parametrize("extra", all_perm("lat_mean", "lon_mean"), indirect=True)
class TestYearMean(CommonTests):
def test_1date_per_year(self, skimmed):
diff_time = skimmed.time.diff("time").values[:]
assert np.all(diff_time.astype("timedelta64[D]") > np.timedelta64(364, "D"))
assert np.all(diff_time.astype("timedelta64[D]") < np.timedelta64(367, "D"))
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_dim_reduction(self, dataset, skimmed, operations, variable):
dataset_var = dataset.cf[variable]
skimmed_var = skimmed.cf[variable]
assert dataset_var.ndim == skimmed_var.ndim + len(operations) - 1
@mark.skip(reason="TODO")
def test_time_boundaries(self, skimmed):
pass
pass # TODO
def test_coords_reduction(self, dataset, skimmed):
assert len(dataset.coords) == len(skimmed.coords)
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_cell_methods(self, skimmed, variable):
var = skimmed.cf[variable]
assert "time: mean (interval: 1 years)" in var.cell_methods
@mark.parametrize("operation", ["lat_mean"], indirect=True)
@mark.parametrize("extra", all_perm("year_mean", "lon_mean"), indirect=True)
class TestLatMean(CommonTests):
def test_no_latitude(self, skimmed):
assert not "Y" in skimmed.cf
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_dim_reduction(self, dataset, skimmed, operations, variable):
dataset_var = dataset.cf[variable]
skimmed_var = skimmed.cf[variable]
if "year_mean" in operations:
assert dataset_var.ndim == skimmed_var.ndim + len(operations) - 1
else:
assert dataset_var.ndim == skimmed_var.ndim + len(operations)
@mark.skip(reason="TODO")
def test_lat_boundaries(self, skimmed):
pass
pass # TODO
def test_coords_reduction(self, dataset, skimmed):
assert len(dataset.coords) == len(skimmed.coords) + 1
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_cell_methods(self, skimmed, variable):
var = skimmed.cf[variable]
assert not "lat" in var.cell_methods
@mark.parametrize("operation", ["lon_mean"], indirect=True)
@mark.parametrize("extra", all_perm("lat_mean", "year_mean"), indirect=True)
class TestLonMean(CommonTests):
def test_no_longitude(self, skimmed):
assert not "X" in skimmed.cf
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_dim_reduction(self, dataset, skimmed, operations, variable):
dataset_var = dataset.cf[variable]
skimmed_var = skimmed.cf[variable]
if "year_mean" in operations:
assert dataset_var.ndim == skimmed_var.ndim + len(operations) - 1
else:
assert dataset_var.ndim == skimmed_var.ndim + len(operations)
@mark.skip(reason="TODO")
def test_lon_boundaries(self, skimmed):
pass
pass # TODO
def test_coords_reduction(self, dataset, skimmed):
assert len(dataset.coords) == len(skimmed.coords) + 1
@mark.parametrize("variable", ["atmosphere_mole_content_of_ozone"])
def test_cell_methods(self, skimmed, variable):
var = skimmed.cf[variable]
assert not "lon" in var.cell_methods
"""Tests utils module"""
import itertools
def all_perm(*args):
"""Returns all arguments permutations in 1 list"""
radios = range(1, len(args) + 1)
perm = lambda r: itertools.permutations(args, r)
return [list(y) for r in radios for y in perm(r)]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment