index_functions.py 13.3 KB
Newer Older
1
2
# -*- coding: utf-8 -*-

3
4
from datetime import datetime

5
from cf_units import Unit
6
import dask.array as da
7
8
import numpy as np

9
10
11
from .support import (normalize_axis,
                      IndexFunction,
                      ThresholdMixin, ReducerMixin)
12
13
14
15
16
17
18
19
20
21
22
from ..util import change_units


class CountLevelCrossings(IndexFunction):
    def __init__(self, threshold):
        super().__init__(units=Unit('days'))
        self.threshold = threshold
        self.extra_coords.append(threshold.copy())

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
23
                 for cube in input_cubes.values()}
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        threshold = self.threshold
        threshold.points = threshold.points.astype(dtype)
        if threshold.has_bounds():
            threshold.bounds = threshold.bounds.astype(dtype)
        change_units(threshold, units, standard_name)
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        cond = da.logical_and(data['low_data'] < self.threshold.points,
                              self.threshold.points < data['high_data'])
        res = np.count_nonzero(cond, axis=axis)
        return res.astype('float32')

    lazy_func = call_func
40

41
42
43
44

class CountOccurrences(ThresholdMixin, IndexFunction):
    def __init__(self, threshold, condition):
        super().__init__(threshold, condition, units=Unit('days'))
45

46
47
    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
48
        cond = self.condition(data, self.threshold.points)
49
50
51
52
53
        res = np.count_nonzero(cond, axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
54
        cond = self.lazy_condition(data, self.threshold.points)
55
56
        res = da.count_nonzero(cond, axis=axis)
        return res.astype('float32')
57
58


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class DiurnalTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = (data['high_data'] - data['low_data']).mean(axis=axis)
        return res.astype('float32')

    lazy_func = call_func


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class ExtremeTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert (units == Unit('Kelvin')) or (units == Unit('degree_Celsius'))
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = (data['high_data'].max(axis=axis) -
               data['low_data'].min(axis=axis))
        return res.astype('float32')

    lazy_func = call_func


98
class FirstOccurrence(ThresholdMixin, IndexFunction):
99
    def __init__(self, threshold, condition):
100
        super().__init__(threshold, condition, units=Unit('days'))
101
102
103
104
        self.NO_OCCURRENCE = np.inf

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
105
        cond = self.condition(data, self.threshold.points)
106
107
108
109
110
111
112
113
        res = np.ma.where(cond.any(axis=axis),
                          cond.argmax(axis=axis),
                          self.NO_OCCURRENCE)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        mask = da.ma.getmaskarray(data).any(axis=axis)
114
        cond = self.lazy_condition(data, self.threshold.points)
115
116
117
118
119
120
        res = da.where(cond.any(axis=axis),
                       cond.argmax(axis=axis),
                       self.NO_OCCURRENCE)
        res = da.ma.masked_array(da.ma.getdata(res), mask)
        return res.astype('float32')

121
122
123
124
125
126
127
128
    def post_process(self, collapsed_cube, data_result, coords,
                     period, **kwargs):
        time = collapsed_cube.coord('time')
        calendar = time.units.calendar
        offsets = np.empty_like(time.points, dtype=data_result.dtype)
        for i, representative_date in enumerate(time.cells()):
            year = representative_date.point.year
            start_date = datetime(year, period.first_month_number, 1)
129
            units = Unit(f'days since {year}-01-01', calendar=calendar)
130
131
132
133
134
135
            offsets[i] = units.date2num(start_date)
        collapsed_cube.data = (collapsed_cube.core_data()
                               + offsets[:, None, None])
        return collapsed_cube


Klaus Zimmermann's avatar
Klaus Zimmermann committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class InterdayDiurnalTemperatureRange(IndexFunction):
    def __init__(self):
        super().__init__(units=Unit('degree_Celsius'))

    def prepare(self, input_cubes):
        props = {(cube.dtype, cube.units, cube.standard_name)
                 for cube in input_cubes.values()}
        assert len(props) == 1
        dtype, units, standard_name = props.pop()
        assert units in [Unit('Kelvin'), Unit('degree_Celsius')]
        super().prepare(input_cubes)

    def call_func(self, data, axis, **kwargs):
        res = np.absolute(np.diff(data['high_data'] -
                                  data['low_data'], axis=axis)).mean(axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        res = da.absolute(da.diff(data['high_data'] -
                                  data['low_data'], axis=axis)).mean(axis=axis)
        return res.astype('float32')


159
class LastOccurrence(ThresholdMixin, IndexFunction):
160
    def __init__(self, threshold, condition):
161
        super().__init__(threshold, condition, units=Unit('days'))
162
163
164
165
        self.NO_OCCURRENCE = -np.inf

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
166
        cond = self.condition(np.flip(data, axis=axis), self.threshold.points)
167
168
169
170
171
172
173
174
175
        ndays = data.shape[axis]
        res = np.ma.where(cond.any(axis=axis),
                          ndays - cond.argmax(axis=axis),
                          self.NO_OCCURRENCE)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        mask = da.ma.getmaskarray(data).any(axis=axis)
176
        cond = self.lazy_condition(da.flip(data, axis), self.threshold.points)
177
178
179
180
181
182
183
        ndays = data.shape[axis]
        res = da.where(cond.any(axis=axis),
                       ndays - cond.argmax(axis=axis),
                       self.NO_OCCURRENCE)
        res = da.ma.masked_array(da.ma.getdata(res), mask)
        return res.astype('float32')

184
185
186
187
188
189
190
191
    def post_process(self, collapsed_cube, data_result, coords,
                     period, **kwargs):
        time = collapsed_cube.coord('time')
        calendar = time.units.calendar
        offsets = np.empty_like(time.points, dtype=data_result.dtype)
        for i, representative_date in enumerate(time.cells()):
            year = representative_date.point.year
            start_date = datetime(year, period.first_month_number, 1)
192
            units = Unit(f'days since {year}-01-01', calendar=calendar)
193
194
195
196
197
198
            offsets[i] = units.date2num(start_date)
        collapsed_cube.data = (collapsed_cube.core_data()
                               + offsets[:, None, None])
        return collapsed_cube


Klaus Zimmermann's avatar
Klaus Zimmermann committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
class Percentile(IndexFunction):
    def __init__(self, percentiles, interpolation='linear'):
        super().__init__(units=Unit('days'))
        points = percentiles.points
        assert np.all(points > 0)
        assert np.all(points < 100)
        self.percentiles = percentiles
        self.interpolation = interpolation
        self.units = '%'

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = np.percentile(data, q=self.percentiles, axis=axis,
                            interpolation=self.interpolation)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)

        def percentile(arr):
            return np.percentile(arr,
                                 q=self.percentiles.points,
                                 interpolation=self.interpolation)
        res = da.apply_along_axis(percentile, axis=axis, arr=data).squeeze()
        return res.astype('float32')


232
class Statistics(ReducerMixin, IndexFunction):
233
    def __init__(self, reducer):
234
        super().__init__(reducer)
235

236
237
    def prepare(self, input_cubes):
        super().prepare(input_cubes)
238
239
240
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units
241
242
243
244
245
246
247
248
249
250
251
252

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = self.reducer(data, axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        res = self.lazy_reducer(data, axis=axis)
        return res.astype('float32')


Klaus Zimmermann's avatar
Klaus Zimmermann committed
253
254
255
256
class ThresholdedPercentile(ThresholdMixin, IndexFunction):
    def __init__(self, threshold, condition,
                 percentiles, interpolation='linear'):
        super().__init__(threshold, condition)
257
258
259
260
261
262
263
264
265
266
267
268
269
270
        points = percentiles.points
        assert np.all(points > 0)
        assert np.all(points < 100)
        self.percentiles = percentiles
        self.interpolation = interpolation

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
271
272
273
274
        mask = np.ma.getmaskarray(data).any(axis=axis)
        comb = self.condition(data, self.threshold.points)
        res = np.percentile(np.ma.masked_where(~comb, data),
                            q=self.percentiles.points, axis=axis,
275
                            interpolation=self.interpolation)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
276
        res = np.ma.masked_array(da.ma.getdata(res), mask)
277
278
279
280
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
281
282
        mask = da.ma.getmaskarray(data).any(axis=axis)
        comb = self.condition(data, self.threshold.points)
283
284
285
286
287

        def percentile(arr):
            return np.percentile(arr,
                                 q=self.percentiles.points,
                                 interpolation=self.interpolation)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
288
289
290
291
        res = da.apply_along_axis(
            percentile, axis=axis,
            arr=np.ma.masked_where(~comb, data)).squeeze()
        res = da.ma.masked_array(da.ma.getdata(res), mask)
292
293
294
        return res.astype('float32')


Klaus Zimmermann's avatar
Klaus Zimmermann committed
295
296
297
class ThresholdedStatistics(ThresholdMixin, ReducerMixin, IndexFunction):
    def __init__(self, threshold, condition, reducer):
        super().__init__(threshold, condition, reducer, units=Unit('days'))
298
299
300
301
302
303
304
305
306
307

    def prepare(self, input_cubes):
        super().prepare(input_cubes)
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        self.units = ref_cube.units

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        comb = self.condition(data, self.threshold.points)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
308
        res = self.reducer(np.ma.masked_where(~comb, data), axis=axis)
309
310
311
312
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
Klaus Zimmermann's avatar
Klaus Zimmermann committed
313
314
        comb = self.lazy_condition(data, self.threshold.points)
        res = self.lazy_reducer(da.ma.masked_where(~comb, data), axis=axis)
315
316
317
        return res.astype('float32')


318
class TemperatureSum(ThresholdMixin, IndexFunction):
319
    def __init__(self, threshold, condition):
320
        super().__init__(threshold, condition, units=Unit('days'))
321
322
323
324
325
326
327
        if condition in ['>', '>=']:
            self.fun = lambda d, t: np.maximum(d - t, 0)
            self.lazy_fun = lambda d, t: da.maximum(d - t, 0)
        else:
            self.fun = lambda d, t: np.maximum(t - d, 0)
            self.lazy_fun = lambda d, t: da.maximum(t - d, 0)

328
329
    def prepare(self, input_cubes):
        super().prepare(input_cubes)
330
331
332
        ref_cube = next(iter(input_cubes.values()))
        self.standard_name = ref_cube.standard_name
        if ref_cube.units.is_convertible('degC'):
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
            self.units = 'degC days'
        else:
            raise RuntimeError("Invalid input units")

    def call_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        threshold = self.threshold.points[0]
        res = np.sum(self.fun(data, threshold), axis=axis)
        return res.astype('float32')

    def lazy_func(self, data, axis, **kwargs):
        axis = normalize_axis(axis, data.ndim)
        threshold = self.threshold.points[0]
        res = da.sum(self.lazy_fun(data, threshold), axis=axis)
        return res.astype('float32')