Commit 34eccac4 authored by Klaus Zimmermann's avatar Klaus Zimmermann
Browse files

Add index templating along with editor component for new table format (closes #70, closes #76)

parent d89b3430
# -*- coding: utf-8 -*-
import re
import pyexcel as pe
from climix.index_functions import SUPPORTED_OPERATORS, SUPPORTED_REDUCERS
def build_periods(spec):
PERIODS = {
'ann': 'annual',
'sea': 'seasonal',
'mon': 'monthly',
'_': 'unknown',
'': 'unknown',
}
periods = [PERIODS[period] for period in spec.split('/')]
return periods
INDEX_FUNCTIONS = {
'FUN_count_occurrences': 'count_occurrences',
'FUN_first_occurrence': 'first_occurrence',
'FUN_last_occurrence': 'last_occurrence',
'FUN_spell_length': 'spell_length',
'FUN_spell_beginning': 'spell_beginning',
'FUN_spell_end': 'spell_end',
'FUN_statistic': 'statistics',
'FUN_percentile': 'percentile',
'FUN_degreesum': 'degreesum',
'FUN_thresholded_statistic': 'thresholded_statistics',
'FUN_thresholded_percentile': 'thresholded_percentile',
'FUN_running_statistics': 'running_statistics',
'FUN_thresholded_running_statistics': 'thresholded_running_statistics',
}
def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):'
method = (r'(?P<method>('
r'point|sum|maximum|maximum_absolute_value|median|'
r'mid_range|minimum|minimum_absolute_value|mean|'
r'mean_absolute_value|mean_of_upper_decile|mode|'
r'range|root_mean_square|standard_deviation|'
r'sum_of_squares|variance))')
where = r'where'
type1 = r'(?P<type1>\w+)'
type2 = r'(?P<type2>\w+)'
clim_indicator = r'(?P<indicator>(within|over))'
clim_unit = r'(?P<unit>(days|years))'
cell_method = re.compile(
f'({name} )+{method}'
f'(( {where} {type1}( over {type2})?)|'
f'( {clim_indicator} {clim_unit}))?')
cms = [m.group(0) for m in cell_method.finditer(cell_method_string)]
return cms
def split_parts(no_parts, part_string):
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_inputs(no_inputs,
variable_string, standard_name_string, cell_method_string):
variables = split_parts(no_inputs, variable_string)
standard_names = split_parts(no_inputs, standard_name_string)
cell_methods = split_parts(no_inputs, cell_method_string)
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
'standard_name': sn,
'cell_methods': tr_cell_methods(cm)})
return d
def split_parameter_definitions(parameter_definitions_string, parameter_names):
name_regex = r'(?P<name>{})'.format('|'.join(parameter_names))
op_regex = r'(?P<operator>{})'.format('|'.join(SUPPORTED_OPERATORS))
red_regex = r'(?P<reducer>{})'.format('|'.join(SUPPORTED_REDUCERS))
qty_regex = (r'\(var_name: (?P<var_name>[^,]*), '
r'standard_name: (?P<standard_name>[^,]*), '
r'value: (?P<value>[^,]*), '
r'unit: (?P<units>[^)]*)\)')
param_regex = r'{}: (?:{}|{}|{})'.format(
name_regex, red_regex, op_regex, qty_regex
)
matcher = re.compile(param_regex)
result = matcher.finditer(parameter_definitions_string)
return result
def tr_parameter(parameter):
if parameter['operator'] is not None:
d = {'var_name': parameter['name'],
'kind': 'operator',
'operator': parameter['operator']}
elif parameter['reducer'] is not None:
d = {'var_name': parameter['name'],
'kind': 'reducer',
'reducer': parameter['reducer']}
elif parameter['value'] is not None:
d = {'var_name': parameter['name'],
'kind': 'quantity',
'standard_name': parameter['standard_name'],
'data': parameter['value'],
'units': parameter['units']}
else:
raise RuntimeError(f"Invalid parameter found {parameter[0]}")
return d
def tr_index_function(name, no_thresholds,
parameter_names_string, parameter_definitions_string):
parameter_names = split_parts(no_thresholds, parameter_names_string)
parameters = split_parameter_definitions(parameter_definitions_string,
parameter_names)
index_function = {
'name': INDEX_FUNCTIONS[name],
'parameters': [tr_parameter(p) for p in parameters],
}
return index_function
def prepare_record(record):
var_name = record['VarName']
no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = {
'var_name': var_name,
'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['allowed_freq']),
'default': build_periods(record['default_freq'])[0]},
'output': {
'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'],
'proposed_standard_name': record['OUTPUT_proposed_standard_name'],
'long_name': record['OUTPUT_long_name'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_user_units'],
},
'inputs': tr_inputs(no_inputs,
record['INPUT_variable'],
record['INPUT_standard_name'],
record['INPUT_cell_methods']),
'index_function': tr_index_function(
record['index_function'],
no_parameters,
record['parameter_name'], record['PARAMETER_definition']),
'ET': {
'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'],
'definition': record['ET_definition'],
'comment': record['ET_comment'],
}
}
proposed_standard_name = record['OUTPUT_proposed_standard_name']
if proposed_standard_name.strip() != '':
d['output']['proposed_standard_name'] = proposed_standard_name
return d
def build_index_definitions(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='ALT_index_attr')
sheet.name_columns_by_row(0)
records = sheet.to_records()
index_definitions = []
for record in records:
if record['ready'] != 1:
continue
index_definitions.append(prepare_record(record))
return index_definitions
# -*- coding: utf-8 -*-
import argparse
import pprint
from jinja2 import Environment, PackageLoader
import pyexcel as pe
import climix
from .mastertable import build_index_definitions
from .mastertable import build_index_definitions as build_master
from .alttable import build_index_definitions as build_alt
def prepare_environment(args):
......@@ -21,7 +20,10 @@ def parse_args():
description=(f'An editor for a climate index thing, '
f'version {climix.__version__}.'))
parser.add_argument('-o', '--output')
parser.add_argument('master_table')
parser.add_argument('-t', '--table',
choices=['index_attr', 'ALT_index_attr'],
default='ALT_index_attr')
parser.add_argument('document')
return parser.parse_args()
......@@ -29,7 +31,13 @@ def main():
args = parse_args()
env = prepare_environment(args)
template = env.get_template('index_definitions.yml')
output = template.render(indices=build_index_definitions(args.master_table))
if args.table == 'index_attr':
index_definitions = build_master(args.document)
elif args.table == 'ALT_index_attr':
index_definitions = build_alt(args.document)
else:
raise RuntimeError(f'Unknown table {args.table}')
output = template.render(indices=index_definitions)
if args.output is None:
print(output)
else:
......
# -*- coding: utf-8 -*-
import re
import pyexcel as pe
......@@ -22,16 +24,41 @@ INDEX_FUNCTIONS = {
def split_parts(no_parts, part_string):
if no_parts == 0:
assert part_string == '_'
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):'
method = (r'(?P<method>('
r'point|sum|maximum|maximum_absolute_value|median|'
r'mid_range|minimum|minimum_absolute_value|mean|'
r'mean_absolute_value|mean_of_upper_decile|mode|'
r'range|root_mean_square|standard_deviation|'
r'sum_of_squares|variance))')
where = r'where'
type1 = r'(?P<type1>\w+)'
type2 = r'(?P<type2>\w+)'
clim_indicator = r'(?P<indicator>(within|over))'
clim_unit = r'(?P<unit>(days|years))'
cell_method = re.compile(
f'({name} )+{method}'
f'(( {where} {type1}( over {type2})?)|'
f'( {clim_indicator} {clim_unit}))?')
cms = [m.group(0) for m in cell_method.finditer(cell_method_string)]
return cms
def tr_inputs(no_inputs,
variable_string, standard_name_string, cell_method_string):
variables = split_parts(no_inputs, variable_string)
standard_names = split_parts(no_inputs, standard_name_string)
cell_methods = split_parts(no_inputs, cell_method_string)
cell_methods = [tr_cell_methods(cm)
for cm in split_parts(no_inputs, cell_method_string)]
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
......@@ -45,8 +72,7 @@ def tr_relops(no_thresholds, relop_string):
d = [{'var_name': f'relop_{i}',
'kind': 'operator',
'operator': f'"{relop}"'}
for i, relop in enumerate(relops)
]
for i, relop in enumerate(relops)]
return d
......@@ -77,18 +103,18 @@ def tr_index_function(name, no_thresholds,
def prepare_record(record):
var_name = record['VarName']
no_thresholds = int(record['N_thresholds'])
no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = {
'var_name': var_name,
'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['freq']),
'default': build_periods(record['default_freq'])[0],},
'default': build_periods(record['default_freq'])[0]},
'output': {
'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'],
'long_name': record['OUTPUT_long_name'],
'cell_methods': record['OUTPUT_cell_methods'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_units'],
},
'inputs': tr_inputs(no_inputs,
......@@ -97,10 +123,10 @@ def prepare_record(record):
record['INPUT_cell_methods']),
'index_function': tr_index_function(
record['index_function'],
no_thresholds,
record['THRESHOLD_VarName'], record['THRESHOLD_standard_name'],
record['THRESHOLD_value'], record['THRESHOLD_units'],
record['THRESHOLD_relop']),
no_parameters,
record['PARAMETER_VarName'], record['PARAMETER_standard_name'],
record['PARAMETER_value'], record['PARAMETER_units'],
record['PARAMETER_relop']),
'ET': {
'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'],
......@@ -121,8 +147,7 @@ def build_index_definitions(file_name):
records = sheet.to_records()
index_definitions = []
for record in records:
try:
index_definitions.append(prepare_record(record))
except:
pass
if record['ready'] != 1:
continue
index_definitions.append(prepare_record(record))
return index_definitions
indices:
{% for idx in indices %}
{{ idx.var_name }}:
reference: ETCCDI
reference: {{ idx.reference }}
period:
allowed:
{% for p in idx.period.allowed %}
......@@ -16,27 +16,35 @@ indices:
{% endif %}
long_name: {{ idx.output.long_name }}
units: {{ idx.output.units }}
# cell_methods: {{ idx.output.cell_methods }}
cell_methods:
{% for cm in idx.output.cell_methods %}
- {{ cm }}
{% endfor %}
input:
{% for input in idx.inputs %}
var_name: {{ input.var_name }}
standard_name: {{ input.standard_name }}
# cell_methods: {{ input.cell_methods }}
cell_methods:
{% for cm in input.cell_methods %}
- {{ cm }}
{% endfor %}
{% endfor %}
index_function:
name: {{ idx.index_function.name }}
parameters:
{% for param in idx.index_function.parameters %}
{% if param.kind == 'quantity' %}
{{ param.var_name }}:
kind: {{ param.kind }}
{% if param.kind == 'quantity' %}
standard_name: {{ param.standard_name }}
data: {{ param.data }}
units: {{ param.units }}
{% elif param.kind == 'operator' %}
{{ param.var_name }}:
kind: {{ param.kind }}
operator: {{ param.operator }}
operator: '{{ param.operator }}'
{% elif param.kind == 'reducer' %}
reducer: {{ param.reducer }}
{% else %}
# Warning: Unknown kind!
{% endif %}
{% endfor %}
ET:
......
......@@ -369,3 +369,43 @@ indices:
long_name:
definition:
comment:
tngt{TT}:
reference: CLIPC
period:
allowed:
annual:
monthly:
seasonal:
default: annual
output:
var_name: tngt{TT}
standard_name: number_of_days_with_air_temperature_above_threshold
proposed_standard_name: number_of_occurrences_with_air_temperature_above_threshold
long_name: Number of days with Tmin > {TT}C
units: days
cell_methods:
- time: minimum within days
- time: sum over days
input:
var_name: tasmin
standard_name: air_temperature
cell_methods:
- time: minimum
index_function:
name: count_occurrences
parameters:
threshold:
kind: quantity
long_name: Threshold value for daily maximum air temperature
standard_name: air_temperature
data: {TT}
units: degree_Celsius
condition:
kind: operator
operator: '>'
ET:
short_name:
long_name:
definition:
comment:
......@@ -2,7 +2,9 @@
# -*- coding: utf-8 -*-
import argparse
import copy
import os
import re
import threading
import time
......@@ -12,6 +14,7 @@ import netCDF4
import numpy as np
import pkg_resources
import sentry_sdk
import six
import yaml
import climix
......@@ -107,6 +110,51 @@ def build_index_function(spec):
return index_function
def build_template_index(index_definitions):
expr = re.compile(r'{([^}]+)}')
template_index = {}
for index in index_definitions.keys():
split = expr.split(index)
if len(split) == 1:
continue
signature = tuple(split[::2])
parameter_names = split[1::2]
template_index[signature] = (index, parameter_names)
return template_index
def replace_parameters_in_dict(dictionary, parameter_dict):
for key, value in dictionary.items():
if isinstance(value, dict):
k, v = list(value.items())[0]
if len(value) == 1 and v is None and k in parameter_dict.keys():
dictionary[key] = parameter_dict[k]
else:
replace_parameters_in_dict(value, parameter_dict)
elif isinstance(value, six.string_types):
dictionary[key] = dictionary[key].format(**parameter_dict)
def get_index_definition(index_definitions, index):
try:
return index_definitions[index]
except KeyError:
index_expr = re.compile(r'(\d+)')
split = index_expr.split(index)
if len(split) == 1:
raise
templates = build_template_index(index_definitions)
signature = tuple(split[::2])
template, parameter_names = templates[signature]
parameter_values = split[1::2]
parameter_dict = {name: int(value)
for (name, value) in
zip(parameter_names, parameter_values)}
index_definition = copy.deepcopy(index_definitions[template])
replace_parameters_in_dict(index_definition, parameter_dict)
return index_definition
def prepare_indices(index_definitions, requested_indices):
def select_period(metadata):
selected_period = metadata['default']
......@@ -114,7 +162,7 @@ def prepare_indices(index_definitions, requested_indices):
return PeriodSpecification(selected_period, period_metadata)
indices = []
for index in requested_indices:
definition = index_definitions[index]
definition = get_index_definition(index_definitions, index)
period_spec = select_period(definition['period'])
index_function = build_index_function(definition['index_function'])
index = Index(index_function, definition['output'], period_spec)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment