index_functions.py 14.9 KB
Newer Older
1
2
# -*- coding: utf-8 -*-

3
4
from datetime import datetime

5
from cf_units import Unit
6
import dask.array as da
7
8
import numpy as np

9
10
11
from .support import (normalize_axis,
                      IndexFunction,
                      ThresholdMixin, ReducerMixin)
12
13
14
15
16
17
18
19
20
21
22
from ..util import change_units


class CountLevelCrossings(IndexFunction):
    def __init__(self, threshold):
        super().__init__(units=Unit('days'))
        self.threshold = threshold
        self.extra_coords.append(threshold.copy())

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
23
                 for cube in input_cubes.values()}
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        threshold = self.threshold
        threshold.points = threshold.points.astype(dtype)
        if threshold.has_bounds():
            threshold.bounds = threshold.bounds.astype(dtype)
        change_units(threshold, units, standard_name)
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        cond = da.logical_and(data['low_data'] < self.threshold.points,
                              self.threshold.points < data['high_data'])
        res = np.count_nonzero(cond, axis=axis)
        return res.astype('float32')

    lazy_func = call_func
40

41
42
43
44

class CountOccurrences(ThresholdMixin, IndexFunction):
    def __init__(self, threshold, condition):
        super().__init__(threshold, condition, units=Unit('days'))
45

46
47
    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
48
        cond = self.condition(data, self.threshold.points)
49
50
51
52
53
        res = np.count_nonzero(cond, axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
54
        cond = self.lazy_condition(data, self.threshold.points)
55
56
        res = da.count_nonzero(cond, axis=axis)
        return res.astype('float32')
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class DiurnalTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = (data['high_data'] - data['low_data']).mean(axis=axis)
        return res.astype('float32')

    lazy_func = call_func


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class ExtremeTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert (units == Unit('Kelvin')) or (units == Unit('degree_Celsius'))
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = (data['high_data'].max(axis=axis) -
               data['low_data'].min(axis=axis))
        return res.astype('float32')

    lazy_func = call_func


98
class FirstOccurrence(ThresholdMixin, IndexFunction):
99
    def __init__(self, threshold, condition):
100
        super().__init__(threshold, condition, units=Unit('days'))
101
102
103
104
        self.NO_OCCURRENCE = np.inf

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
105
        cond = self.condition(data, self.threshold.points)
106
107
108
109
110
111
112
113
        res = np.ma.where(cond.any(axis=axis),
                          cond.argmax(axis=axis),
                          self.NO_OCCURRENCE)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        mask = da.ma.getmaskarray(data).any(axis=axis)
114
        cond = self.lazy_condition(data, self.threshold.points)
115
116
117
118
119
120
        res = da.where(cond.any(axis=axis),
                       cond.argmax(axis=axis),
                       self.NO_OCCURRENCE)
        res = da.ma.masked_array(da.ma.getdata(res), mask)
        return res.astype('float32')

121
122
123
124
125
126
127
128
    def post_process(self, collapsed_cube, data_result, coords,
                     period, **kwargs):
        time = collapsed_cube.coord('time')
        calendar = time.units.calendar
        offsets = np.empty_like(time.points, dtype=data_result.dtype)
        for i, representative_date in enumerate(time.cells()):
            year = representative_date.point.year
            start_date = datetime(year, period.first_month_number, 1)
129
            units = Unit(f'days since {year}-01-01', calendar=calendar)
130
131
132
133
134
135
            offsets[i] = units.date2num(start_date)
        collapsed_cube.data = (collapsed_cube.core_data()
                               + offsets[:, None, None])
        return collapsed_cube


Klaus Zimmermann's avatar
Klaus Zimmermann committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class InterdayDiurnalTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = np.absolute(np.diff(data['high_data'] -
                                  data['low_data'], axis=axis)).mean(axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        res = da.absolute(da.diff(data['high_data'] -
                                  data['low_data'], axis=axis)).mean(axis=axis)
        return res.astype('float32')


159
class LastOccurrence(ThresholdMixin, IndexFunction):
160
    def __init__(self, threshold, condition):
161
        super().__init__(threshold, condition, units=Unit('days'))
162
163
164
165
        self.NO_OCCURRENCE = -np.inf

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
166
        cond = self.condition(np.flip(data, axis=axis), self.threshold.points)
167
168
169
170
171
172
173
174
175
        ndays = data.shape[axis]
        res = np.ma.where(cond.any(axis=axis),
                          ndays - cond.argmax(axis=axis),
                          self.NO_OCCURRENCE)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        mask = da.ma.getmaskarray(data).any(axis=axis)
176
        cond = self.lazy_condition(da.flip(data, axis), self.threshold.points)
177
178
179
180
181
182
183
        ndays = data.shape[axis]
        res = da.where(cond.any(axis=axis),
                       ndays - cond.argmax(axis=axis),
                       self.NO_OCCURRENCE)
        res = da.ma.masked_array(da.ma.getdata(res), mask)
        return res.astype('float32')

184
185
186
187
188
189
190
191
    def post_process(self, collapsed_cube, data_result, coords,
                     period, **kwargs):
        time = collapsed_cube.coord('time')
        calendar = time.units.calendar
        offsets = np.empty_like(time.points, dtype=data_result.dtype)
        for i, representative_date in enumerate(time.cells()):
            year = representative_date.point.year
            start_date = datetime(year, period.first_month_number, 1)
192
            units = Unit(f'days since {year}-01-01', calendar=calendar)
193
194
195
196
197
198
            offsets[i] = units.date2num(start_date)
        collapsed_cube.data = (collapsed_cube.core_data()
                               + offsets[:, None, None])
        return collapsed_cube


Klaus Zimmermann's avatar
Klaus Zimmermann committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
class Percentile(IndexFunction):
    def __init__(self, percentiles, interpolation='linear'):
        super().__init__(units=Unit('days'))
        points = percentiles.points
        assert np.all(points > 0)
        assert np.all(points < 100)
        self.percentiles = percentiles
        self.interpolation = interpolation
        self.units = '%'

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = np.percentile(data, q=self.percentiles, axis=axis,
                            interpolation=self.interpolation)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)

        def percentile(arr):
            return np.percentile(arr,
                                 q=self.percentiles.points,
                                 interpolation=self.interpolation)
        res = da.apply_along_axis(percentile, axis=axis, arr=data).squeeze()
        return res.astype('float32')


232
class SpellLength(ThresholdMixin, ReducerMixin, IndexFunction):
233
    def __init__(self, threshold, condition, reducer):
234
        super().__init__(threshold, condition, reducer, units=Unit('days'))
235

236
237
    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
238
239
240
241
        mask = np.ma.getmaskarray(data).any(axis=axis)
        res = np.apply_along_axis(self, axis=axis, arr=data)
        res = np.ma.masked_array(np.ma.getdata(res), mask)
        return res.astype('float32')
242
243
244

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
245
246
247
248
        mask = da.ma.getmaskarray(data).any(axis=axis)
        res = da.apply_along_axis(self, axis=axis, arr=data)
        res = da.ma.masked_array(da.ma.getdata(res), mask)
        return res.astype('float32')
249

250
    def __call__(self, raw_data):
251
        data = self.condition(raw_data, self.threshold.points)
252
253
254
255
256
257
258
        where = np.flatnonzero
        x = data[1:] != data[:-1]
        starts = where(x) + 1
        n = len(x)
        no_runs = len(starts)
        if no_runs == 0:
            res = n if data[0] else 0
259
        else:
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
            candidates = []
            if data[0]:
                candidates.append(starts[0])
            if data[starts[-1]]:
                candidates.append(n - starts[-1])
            if no_runs > 1:
                lengths = np.diff(starts)
                values = data[starts[:-1]]
                if np.any(values):
                    lmax = self.reducer(lengths[values])
                    candidates.append(lmax)
            if len(candidates) == 0:
                res = 0
            else:
                res = self.reducer(candidates)
        return float(res)
276
277


278
class Statistics(ReducerMixin, IndexFunction):
279
    def __init__(self, reducer):
280
        super().__init__(reducer)
281

282
283
    def prepare(self, input_cubes):
        super().prepare(input_cubes)
284
285
286
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units
287
288
289
290
291
292
293
294
295
296
297
298

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = self.reducer(data, axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = self.lazy_reducer(data, axis=axis)
        return res.astype('float32')


Klaus Zimmermann's avatar
Klaus Zimmermann committed
299
300
301
302
class ThresholdedPercentile(ThresholdMixin, IndexFunction):
    def __init__(self, threshold, condition,
                 percentiles, interpolation='linear'):
        super().__init__(threshold, condition)
303
304
305
306
307
308
309
310
311
312
313
314
315
316
        points = percentiles.points
        assert np.all(points > 0)
        assert np.all(points < 100)
        self.percentiles = percentiles
        self.interpolation = interpolation

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
317
318
319
320
        mask = np.ma.getmaskarray(data).any(axis=axis)
        comb = self.condition(data, self.threshold.points)
        res = np.percentile(np.ma.masked_where(~comb, data),
                            q=self.percentiles.points, axis=axis,
321
                            interpolation=self.interpolation)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
322
        res = np.ma.masked_array(da.ma.getdata(res), mask)
323
324
325
326
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
327
328
        mask = da.ma.getmaskarray(data).any(axis=axis)
        comb = self.condition(data, self.threshold.points)
329
330
331
332
333

        def percentile(arr):
            return np.percentile(arr,
                                 q=self.percentiles.points,
                                 interpolation=self.interpolation)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
334
335
336
337
        res = da.apply_along_axis(
            percentile, axis=axis,
            arr=np.ma.masked_where(~comb, data)).squeeze()
        res = da.ma.masked_array(da.ma.getdata(res), mask)
338
339
340
        return res.astype('float32')


Klaus Zimmermann's avatar
Klaus Zimmermann committed
341
342
343
class ThresholdedStatistics(ThresholdMixin, ReducerMixin, IndexFunction):
    def __init__(self, threshold, condition, reducer):
        super().__init__(threshold, condition, reducer, units=Unit('days'))
344
345
346
347
348
349
350
351
352
353

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        comb = self.condition(data, self.threshold.points)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
354
        res = self.reducer(np.ma.masked_where(~comb, data), axis=axis)
355
356
357
358
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
359
360
        comb = self.lazy_condition(data, self.threshold.points)
        res = self.lazy_reducer(da.ma.masked_where(~comb, data), axis=axis)
361
362
363
        return res.astype('float32')


364
class TemperatureSum(ThresholdMixin, IndexFunction):
365
    def __init__(self, threshold, condition):
366
        super().__init__(threshold, condition, units=Unit('days'))
367
368
369
370
371
372
373
        if condition in ['>', '>=']:
            self.fun = lambda d, t: np.maximum(d - t, 0)
            self.lazy_fun = lambda d, t: da.maximum(d - t, 0)
        else:
            self.fun = lambda d, t: np.maximum(t - d, 0)
            self.lazy_fun = lambda d, t: da.maximum(t - d, 0)

374
375
    def prepare(self, input_cubes):
        super().prepare(input_cubes)
376
377
378
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        if ref_cube.units.is_convertible('degC'):
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
            self.units = 'degC days'
        else:
            raise RuntimeError("Invalid input units")

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        threshold = self.threshold.points[0]
        res = np.sum(self.fun(data, threshold), axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        threshold = self.threshold.points[0]
        res = da.sum(self.lazy_fun(data, threshold), axis=axis)
        return res.astype('float32')