Commit a146b0ba authored by Klaus Zimmermann's avatar Klaus Zimmermann
Browse files

Housekeeping (closes #174)

- Remove obsolete directory "legacy"
- Remove obsolete branch "legacy"
- Order index functions consistently alphabetical
  in `setup.py`, `index_functions.py`, and `__init__.py`
parent 221044ff
......@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
### Changed
### Deprecated
### Removed
- Removed obsolete legacy directory
- Removed obsolete legacy branch
### Fixed
### Security
## [0.12.0] - 2020-02-20
### Added
......
......@@ -4,14 +4,14 @@ from .index_functions import ( # noqa: F401
CountLevelCrossings,
CountOccurrences,
DiurnalTemperatureRange,
InterdayDiurnalTemperatureRange,
ExtremeTemperatureRange,
FirstOccurrence,
InterdayDiurnalTemperatureRange,
LastOccurrence,
Percentile,
SpellLength,
Statistics,
ThresholdedPercentile,
ThresholdedStatistics,
ThresholdedPercentile,
TemperatureSum,
)
......@@ -95,29 +95,6 @@ class ExtremeTemperatureRange(IndexFunction):
lazy_func = call_func
class InterdayDiurnalTemperatureRange(IndexFunction):
def __init__(self):
super().__init__(units=Unit('degree_Celsius'))
def prepare(self, input_cubes):
props = {(cube.dtype, cube.units, cube.standard_name)
for cube in input_cubes.values()}
assert len(props) == 1
dtype, units, standard_name = props.pop()
assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
super().prepare(input_cubes)
def call_func(self, data, axis, **kwargs):
res = np.absolute(np.diff(data['high_data'] -
data['low_data'], axis=axis)).mean(axis=axis)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
res = da.absolute(da.diff(data['high_data'] -
data['low_data'], axis=axis)).mean(axis=axis)
return res.astype('float32')
class FirstOccurrence(ThresholdMixin, IndexFunction):
def __init__(self, threshold, condition):
super().__init__(threshold, condition, units=Unit('days'))
......@@ -156,6 +133,29 @@ class FirstOccurrence(ThresholdMixin, IndexFunction):
return collapsed_cube
class InterdayDiurnalTemperatureRange(IndexFunction):
def __init__(self):
super().__init__(units=Unit('degree_Celsius'))
def prepare(self, input_cubes):
props = {(cube.dtype, cube.units, cube.standard_name)
for cube in input_cubes.values()}
assert len(props) == 1
dtype, units, standard_name = props.pop()
assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
super().prepare(input_cubes)
def call_func(self, data, axis, **kwargs):
res = np.absolute(np.diff(data['high_data'] -
data['low_data'], axis=axis)).mean(axis=axis)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
res = da.absolute(da.diff(data['high_data'] -
data['low_data'], axis=axis)).mean(axis=axis)
return res.astype('float32')
class LastOccurrence(ThresholdMixin, IndexFunction):
def __init__(self, threshold, condition):
super().__init__(threshold, condition, units=Unit('days'))
......@@ -196,6 +196,39 @@ class LastOccurrence(ThresholdMixin, IndexFunction):
return collapsed_cube
class Percentile(IndexFunction):
def __init__(self, percentiles, interpolation='linear'):
super().__init__(units=Unit('days'))
points = percentiles.points
assert np.all(points > 0)
assert np.all(points < 100)
self.percentiles = percentiles
self.interpolation = interpolation
self.units = '%'
def prepare(self, input_cubes):
super().prepare(input_cubes)
ref_cube = next(iter(input_cubes.values()))
self.standard_name = ref_cube.standard_name
self.units = ref_cube.units
def call_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
res = np.percentile(data, q=self.percentiles, axis=axis,
interpolation=self.interpolation)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
def percentile(arr):
return np.percentile(arr,
q=self.percentiles.points,
interpolation=self.interpolation)
res = da.apply_along_axis(percentile, axis=axis, arr=data).squeeze()
return res.astype('float32')
class SpellLength(ThresholdMixin, ReducerMixin, IndexFunction):
def __init__(self, threshold, condition, reducer):
super().__init__(threshold, condition, reducer, units=Unit('days'))
......@@ -263,38 +296,15 @@ class Statistics(ReducerMixin, IndexFunction):
return res.astype('float32')
class ThresholdedStatistics(ThresholdMixin, ReducerMixin, IndexFunction):
def __init__(self, threshold, condition, reducer):
super().__init__(threshold, condition, reducer, units=Unit('days'))
def prepare(self, input_cubes):
super().prepare(input_cubes)
ref_cube = next(iter(input_cubes.values()))
self.standard_name = ref_cube.standard_name
self.units = ref_cube.units
def call_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
comb = self.condition(data, self.threshold.points)
res = self.reducer(np.ma.masked_where(~comb, data), axis=axis)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
comb = self.lazy_condition(data, self.threshold.points)
res = self.lazy_reducer(da.ma.masked_where(~comb, data), axis=axis)
return res.astype('float32')
class Percentile(IndexFunction):
def __init__(self, percentiles, interpolation='linear'):
super().__init__(units=Unit('days'))
class ThresholdedPercentile(ThresholdMixin, IndexFunction):
def __init__(self, threshold, condition,
percentiles, interpolation='linear'):
super().__init__(threshold, condition)
points = percentiles.points
assert np.all(points > 0)
assert np.all(points < 100)
self.percentiles = percentiles
self.interpolation = interpolation
self.units = '%'
def prepare(self, input_cubes):
super().prepare(input_cubes)
......@@ -304,30 +314,33 @@ class Percentile(IndexFunction):
def call_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
res = np.percentile(data, q=self.percentiles, axis=axis,
mask = np.ma.getmaskarray(data).any(axis=axis)
comb = self.condition(data, self.threshold.points)
res = np.percentile(np.ma.masked_where(~comb, data),
q=self.percentiles.points, axis=axis,
interpolation=self.interpolation)
res = np.ma.masked_array(da.ma.getdata(res), mask)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
mask = da.ma.getmaskarray(data).any(axis=axis)
comb = self.condition(data, self.threshold.points)
def percentile(arr):
return np.percentile(arr,
q=self.percentiles.points,
interpolation=self.interpolation)
res = da.apply_along_axis(percentile, axis=axis, arr=data).squeeze()
res = da.apply_along_axis(
percentile, axis=axis,
arr=np.ma.masked_where(~comb, data)).squeeze()
res = da.ma.masked_array(da.ma.getdata(res), mask)
return res.astype('float32')
class ThresholdedPercentile(ThresholdMixin, IndexFunction):
def __init__(self, threshold, condition,
percentiles, interpolation='linear'):
super().__init__(threshold, condition)
points = percentiles.points
assert np.all(points > 0)
assert np.all(points < 100)
self.percentiles = percentiles
self.interpolation = interpolation
class ThresholdedStatistics(ThresholdMixin, ReducerMixin, IndexFunction):
def __init__(self, threshold, condition, reducer):
super().__init__(threshold, condition, reducer, units=Unit('days'))
def prepare(self, input_cubes):
super().prepare(input_cubes)
......@@ -337,27 +350,14 @@ class ThresholdedPercentile(ThresholdMixin, IndexFunction):
def call_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
mask = np.ma.getmaskarray(data).any(axis=axis)
comb = self.condition(data, self.threshold.points)
res = np.percentile(np.ma.masked_where(~comb, data),
q=self.percentiles.points, axis=axis,
interpolation=self.interpolation)
res = np.ma.masked_array(da.ma.getdata(res), mask)
res = self.reducer(np.ma.masked_where(~comb, data), axis=axis)
return res.astype('float32')
def lazy_func(self, data, axis, **kwargs):
axis = normalize_axis(axis, data.ndim)
mask = da.ma.getmaskarray(data).any(axis=axis)
comb = self.condition(data, self.threshold.points)
def percentile(arr):
return np.percentile(arr,
q=self.percentiles.points,
interpolation=self.interpolation)
res = da.apply_along_axis(
percentile, axis=axis,
arr=np.ma.masked_where(~comb, data)).squeeze()
res = da.ma.masked_array(da.ma.getdata(res), mask)
comb = self.lazy_condition(data, self.threshold.points)
res = self.lazy_reducer(da.ma.masked_where(~comb, data), axis=axis)
return res.astype('float32')
......
import numpy as np
import iris
import iris.coords
import iris.coord_categorisation
from cf_units import Unit
# from change_pr_units import change_pr_units
# import pprint
# import check_cubes
var = 'pr'
indir = '/home/rossby/imports/obs/UERRA/SURFEX-MESCAN/input/mon/'
inroot = '_EUR-055_UERRA-HARMONIE_RegRean_v1d1-v1d2_SURFEX-MESCAN_v1_mon_*.nc'
filename = '{}{}{}'.format(indir, var, inroot)
cubelist = iris.load(filename)
creation_date = ''
tracking_id = ''
for cube in cubelist:
creation_date += cube.attributes.pop('creation_date', None) + ' '
tracking_id += cube.attributes.pop('tracking_id', None) + ' '
# pprint.pprint(check_cubes(.....))
datacube = cubelist.concatenate()
if len(datacube) > 1:
raise(TypeError)
else:
datacube = datacube[0]
f = np.diff(datacube.coords('time')[0].bounds) * 24 * 60 * 60
datacube *= f[:, :, np.newaxis]
datacube.units = Unit('mm')
datacube.standard_name = 'lwe_thickness_of_precipitation_amount'
datacube.attributes['creation_date'] = creation_date
datacube.attributes['tracking_id'] = tracking_id
iris.coord_categorisation.add_month_number(datacube, 'time', name='month')
iris.coord_categorisation.add_year(datacube, 'time', name='year')
Precip_sum_ANN = datacube.aggregated_by('year', iris.analysis.SUM)
Precip_sum_ANN.long_name = u'Annual Total Precipitation'
Precip_sum_ANN.var_name = u'Precip_sum_ANN'
Precip_sum_ANN.remove_coord('month')
c = Precip_sum_ANN.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Precip_sum_ANN_6190 = c.collapsed('year', iris.analysis.MEAN)
c = datacube.extract(iris.Constraint(
coord_values={u'month': lambda cell: 3 < cell < 11}))
Precip_sum_AMJJASO = c.aggregated_by('year', iris.analysis.SUM)
Precip_sum_AMJJASO.long_name = u'April-October Total Precipitation'
Precip_sum_AMJJASO.var_name = u'Precip_sum_AMJJASO'
Precip_sum_AMJJASO.remove_coord('month')
e = Precip_sum_AMJJASO.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Precip_sum_AMJJASO_6190 = e.collapsed('year', iris.analysis.MEAN)
outdir = '/home/sm_lbarr/PROJ/B4EST/'
outroot = '_EUR-055_UERRA-HARMONIE_RegRean_v1d1-v1d2_SURFEX-MESCAN_v1_ann_1961-2015.nc'
iris.save(Precip_sum_ANN,
'{}{}{}'.format(outdir, 'Precip_sum_ANN', outroot))
iris.save(Precip_sum_AMJJASO,
'{}{}{}'.format(outdir, 'Precip_sum_AMJJASO', outroot))
iris.save(Precip_sum_ANN_6190,
'{}{}{}'.format(outdir, 'Precip_sum_ANN_6190', outroot))
iris.save(Precip_sum_AMJJASO_6190,
'{}{}{}'.format(outdir, 'Precip_sum_AMJJASO_6190', outroot))
# import pprint
import iris
import iris.coords
import iris.coord_categorisation
from cf_units import Unit
# import check_cubes
var = 'tas'
indir = '/home/rossby/imports/obs/UERRA/SURFEX-MESCAN/input/mon/'
inroot = '_EUR-055_UERRA-HARMONIE_RegRean_v1d1-v1d2_SURFEX-MESCAN_v1_mon_*.nc'
filename = '{}{}{}'.format(indir, var, inroot)
cubelist = iris.load(filename)
creation_date = ''
tracking_id = ''
for cube in cubelist:
creation_date += cube.attributes.pop('creation_date', None) + ' '
tracking_id += cube.attributes.pop('tracking_id', None) + ' '
# pprint.pprint(check_cubes(.....))
datacube = cubelist.concatenate()
if len(datacube) > 1:
raise(TypeError)
else:
datacube = datacube[0]
datacube.convert_units(Unit('degree_Celsius'))
datacube.attributes['creation_date'] = creation_date
datacube.attributes['tracking_id'] = tracking_id
iris.coord_categorisation.add_month(datacube, 'time', name='month')
iris.coord_categorisation.add_year(datacube, 'time', name='year')
Tmean_ANN = datacube.aggregated_by('year', iris.analysis.MEAN)
Tmean_ANN.long_name = u'Annual Mean Near-Surface Air Temperature'
Tmean_ANN.var_name = u'Tmean_ANN'
Tmean_ANN.remove_coord('month')
c = Tmean_ANN.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Tmean_ANN_6190 = c.collapsed('year', iris.analysis.MEAN)
Tmean_JAN = datacube.extract(iris.Constraint(coord_values={u'month': 'Jan'}))
Tmean_JAN.long_name = u'January Mean Near-Surface Air Temperature'
Tmean_JAN.var_name = u'Tmean_JAN'
Tmean_JAN.remove_coord('month')
c = Tmean_JAN.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Tmean_JAN_6190 = c.collapsed('year', iris.analysis.MEAN)
Tmean_FEB = datacube.extract(iris.Constraint(coord_values={u'month': 'Feb'}))
Tmean_FEB.long_name = u'February Mean Near-Surface Air Temperature'
Tmean_FEB.var_name = u'Tmean_FEB'
Tmean_FEB.remove_coord('month')
c = Tmean_FEB.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Tmean_FEB_6190 = c.collapsed('year', iris.analysis.MEAN)
Tmean_JUL = datacube.extract(iris.Constraint(coord_values={u'month': 'Jul'}))
Tmean_JUL.long_name = u'July Mean Near-Surface Air Temperature'
Tmean_JUL.var_name = u'Tmean_JUL'
Tmean_JUL.remove_coord('month')
c = Tmean_JUL.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Tmean_JUL_6190 = c.collapsed('year', iris.analysis.MEAN)
Tmean_max = datacube.aggregated_by('year', iris.analysis.MAX)
Tmean_min = datacube.aggregated_by('year', iris.analysis.MIN)
Tmean_RANGE = Tmean_max - Tmean_min
Tmean_RANGE.attributes = Tmean_ANN.attributes
Tmean_RANGE.standard_name = u'air_temperature'
Tmean_RANGE.long_name = u'Near-Surface Air Temperature Annual Range'
Tmean_RANGE.var_name = u'Tmean_RANGE'
Tmean_RANGE.units = Unit('degree_Celsius')
Tmean_RANGE.remove_coord('month')
Tmean_RANGE.cell_methods = (iris.coords.CellMethod(method='mean',
coords='time',
intervals='within month'),
iris.coords.CellMethod(method='range',
coords='time',
intervals='within year'))
c = Tmean_RANGE.extract(iris.Constraint(
coord_values={'year': lambda cell: 1960 < cell < 1991}))
Tmean_RANGE_6190 = c.collapsed('year', iris.analysis.MEAN)
outdir = '/home/sm_lbarr/PROJ/B4EST/'
outroot = '_EUR-055_UERRA-HARMONIE_RegRean_v1d1-v1d2_SURFEX-MESCAN_v1_ann_1961-2015.nc'
iris.save(Tmean_ANN, '{}{}{}'.format(outdir, 'Tmean_ANN', outroot))
iris.save(Tmean_JAN, '{}{}{}'.format(outdir, 'Tmean_JAN', outroot))
iris.save(Tmean_FEB, '{}{}{}'.format(outdir, 'Tmean_FEB', outroot))
iris.save(Tmean_JUL, '{}{}{}'.format(outdir, 'Tmean_JUL', outroot))
iris.save(Tmean_RANGE, '{}{}{}'.format(outdir, 'Tmean_RANGE', outroot))
iris.save(Tmean_ANN_6190, '{}{}{}'.format(outdir, 'Tmean_ANN_6190', outroot))
iris.save(Tmean_JAN_6190, '{}{}{}'.format(outdir, 'Tmean_JAN_6190', outroot))
iris.save(Tmean_FEB_6190, '{}{}{}'.format(outdir, 'Tmean_FEB_6190', outroot))
iris.save(Tmean_JUL_6190, '{}{}{}'.format(outdir, 'Tmean_JUL_6190', outroot))
iris.save(Tmean_RANGE_6190, '{}{}{}'.format(outdir, 'Tmean_RANGE_6190', outroot))
import glob
import numpy as np
import iris
import iris.coords
import iris.coord_categorisation
from iris.analysis import Aggregator
from iris.util import rolling_window
from cf_units import Unit
MISSVAL = 1.0e20
def veg_bounds(indata, axis, condition, start_end):
"""
Args:
* indata (array):
thresholded (indicator) data.
* axis (int):
number of the array dimension mapping the time sequences.
(Can also be negative, e.g. '-1' means last dimension)
* condition (int):
minimum number of consecutive days to start the vegetation period.
* start_end (string):
"start" | "end" indicating which bound
"""
se = start_end.lower()
if se.startswith('s'):
data = indata
s = True
elif se.startswith('e'):
data = np.flip(indata, axis=axis)
s = False
else:
raise RuntimeError("Argument start_end not understood <{}>."
"".format(start_end))
if axis < 0:
# just cope with negative axis numbers
axis += indata.ndim
year_length = indata.shape[-1]
# Make an array with data values "windowed" along the time axis.
hit_windows = rolling_window(data, window=condition, axis=axis)
# Find the windows "full of True-s" (along the added 'window axis').
full_windows = np.all(hit_windows, axis=axis+1)
# Find first occurence fulfilling the condition (along the time axis).
doy = np.argmax(full_windows, axis=axis)
# mask gridcells where doy == 0 as a result of the condition not being met
ind = map(np.squeeze,
np.split(np.mgrid[:doy.shape[0], :doy.shape[1]], 2, 0)) + [doy]
mask = (~full_windows[ind]) | (doy > np.ceil(year_length / 2))
masked_doy = np.ma.masked_array(np.float32(doy), mask)
# convert to day of year in the interval 1..year_length
# for end calculations count from end of year
if s:
masked_doy = masked_doy + 1
else:
masked_doy = year_length - masked_doy
return masked_doy
def veg_gdd(data, start, end):
doy = np.arange(data.shape[0]) + 1
dayix = doy[:, None, None]
L = (dayix < start[None, :, :]) | (dayix > end[None, :, :])
data.data[L] = 0.0
data.data = np.maximum(data.data - 5, 0.0)
gdd = data.collapsed('time', iris.analysis.SUM)
return gdd, data
def main():
# Make an aggregator from the user function.
VEG_BOUNDS = Aggregator('veg_bounds',
veg_bounds,
units_func=lambda units: 1)
# Define the parameters of the index.
threshold_temperature = 5.0
consecutive_days = 4
var = 'tas'
indir = '/home/rossby/imports/obs/UERRA/SURFEX-MESCAN/input/day/'
inroot = '_EUR-055_UERRA-HARMONIE_RegRean_v1d1-v1d2_SURFEX-MESCAN_v1_day_*.nc' # noqa
filepattern = '{}{}{}'.format(indir, var, inroot)
filelist = sorted(glob.glob(filepattern))
creation_date = ''
tracking_id = ''
vegStart = iris.cube.CubeList([])
vegEnd = iris.cube.CubeList([])
vegLen = iris.cube.CubeList([])
vegGDD = iris.cube.CubeList([])
for f in filelist: # one file / year at the time ...
datacube = iris.load_cube(f)
creation_date += datacube.attributes.pop('creation_date', None) + ' '
tracking_id += datacube.attributes.pop('tracking_id', None) + ' '
datacube.convert_units(Unit('degC'))
iris.coord_categorisation.add_day_of_year(datacube, 'time', name='doy')
iris.coord_categorisation.add_year(datacube, 'time', name='year')
gtThreshold = datacube.copy()
gtThreshold.data = gtThreshold.data > threshold_temperature
Vstart = gtThreshold.collapsed('time', VEG_BOUNDS,
condition=consecutive_days,
start_end='start')
mask = np.ma.getmaskarray(Vstart.data)
# iris.util.mask_cube(Vstart, mask)
Vend = gtThreshold.collapsed('time', VEG_BOUNDS,
condition=consecutive_days,
start_end='end')
mask = mask | np.ma.getmaskarray(Vend.data)
Vend.data = np.ma.masked_array(Vend.data, mask)
# iris.util.mask_cube(Vend, mask)
Vlen = Vend - Vstart + 1
Vlen.data[mask] = 0
(Vgdd, testgdd) = veg_gdd(datacube.copy(), Vstart.data, Vend.data)
Vgdd.data[mask] = 0
Vstart.remove_coord('doy')
Vend.remove_coord('doy')
Vlen.remove_coord('doy')
Vgdd.remove_coord('doy')
Vstart.remove_coord('year')
Vend.remove_coord('year')
Vlen.remove_coord('year')
Vgdd.remove_coord('year')
vegStart.append(Vstart.copy())
vegEnd.append(Vend.copy())
vegLen.append(Vlen.copy())
vegGDD.append(Vgdd.copy())
dayVegStart5_ANN = vegStart.merge_cube()
dayVegStart5_ANN.units = Unit('days')
dayVegStart5_ANN._FillValue = MISSVAL
dayVegStart5_ANN.valid_min = 1.
dayVegStart5_ANN.valid_max = 183.
dayVegStart5_ANN.standard_name = None
dayVegStart5_ANN.long_name = u'Start of Vegetation Period (first 4 days above 5 degC)' # noqa
dayVegStart5_ANN.var_name = u'dayVegStart5_ANN'
dayVegStart5_ANN.attributes = datacube.attributes
dayVegStart5_ANN.attributes['creation_date'] = creation_date
dayVegStart5_ANN.attributes['tracking_id'] = tracking_id
dayVegEnd5_ANN = vegEnd.merge_cube()
dayVegEnd5_ANN.units = Unit('days')
dayVegEnd5_ANN._FillValue = MISSVAL
dayVegEnd5_ANN.valid_min = 183.
dayVegEnd5_ANN.valid_max = 366.
dayVegEnd5_ANN.standard_name = None
dayVegEnd5_ANN.long_name = u'End of Vegetation Period (last 4 days above 5 degC)' # noqa