Commit b8cfb104 authored by Klaus Zimmermann's avatar Klaus Zimmermann
Browse files

Update editor to deal with new master table (closes #161)

parent bd9c5640
# -*- coding: utf-8 -*-
import logging
import re
import pyexcel as pe
from climix.index_functions.support import (SUPPORTED_OPERATORS,
SUPPORTED_REDUCERS)
def build_periods(spec):
PERIODS = {
'ann': 'annual',
'sea': 'seasonal',
'mon': 'monthly',
'_': 'unknown',
'': 'unknown',
}
periods = [PERIODS[period] for period in spec.split('/')]
return periods
def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):'
method = (r'(?P<method>('
r'point|sum|maximum|maximum_absolute_value|median|'
r'mid_range|minimum|minimum_absolute_value|mean|'
r'mean_absolute_value|mean_of_upper_decile|mode|'
r'range|root_mean_square|standard_deviation|'
r'sum_of_squares|variance))')
where = r'where'
type1 = r'(?P<type1>\w+)'
type2 = r'(?P<type2>\w+)'
clim_indicator = r'(?P<indicator>(within|over))'
clim_unit = r'(?P<unit>(days|years))'
cell_method = re.compile(
f'({name} )+{method}'
f'(( {where} {type1}( over {type2})?)|'
f'( {clim_indicator} {clim_unit}))?')
cms = [m.group(0) for m in cell_method.finditer(cell_method_string)]
return cms
def split_parts(no_parts, part_string):
if no_parts == 0:
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_inputs(no_inputs,
variable_string, standard_name_string, cell_method_string):
variables = split_parts(no_inputs, variable_string)
standard_names = split_parts(no_inputs, standard_name_string)
cell_methods = split_parts(no_inputs, cell_method_string)
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
'standard_name': sn,
'cell_methods': tr_cell_methods(cm)})
return d
def tr_parameter(parameter):
if parameter['operator'] is not None:
d = {'var_name': parameter['name'],
'kind': 'operator',
'operator': parameter['operator']}
elif parameter['reducer'] is not None:
d = {'var_name': parameter['name'],
'kind': 'reducer',
'reducer': parameter['reducer']}
elif parameter['value'] is not None:
d = {'var_name': parameter['name'],
'kind': 'quantity',
'standard_name': parameter['standard_name'],
'data': parameter['value'],
'units': parameter['units']}
else:
raise RuntimeError(f"Invalid parameter found {parameter[0]}")
return d
def split_parameter_definitions(parameter_definitions_string, parameter_names):
name_regex = r'(?P<name>{})'.format('|'.join(parameter_names))
op_regex = r'(?P<operator>{})'.format('|'.join(SUPPORTED_OPERATORS))
red_regex = r'(?P<reducer>{})'.format('|'.join(SUPPORTED_REDUCERS))
qty_regex = (r'\(var_name: (?P<var_name>[^,]*), '
r'standard_name: (?P<standard_name>[^,]*), '
r'value: (?P<value>[^,]*), '
r'unit: (?P<units>[^)]*)\)')
param_regex = r'{}: (?:{}|{}|{})'.format(
name_regex, red_regex, op_regex, qty_regex
)
matcher = re.compile(param_regex)
result = [tr_parameter(p)
for p in matcher.finditer(parameter_definitions_string)]
return result
def tr_index_function(index_name, name, no_thresholds,
parameter_names_string, parameter_definitions_string):
parameter_names = split_parts(no_thresholds, parameter_names_string)
parameters = split_parameter_definitions(parameter_definitions_string,
parameter_names)
found_parameters = set(p['var_name'] for p in parameters)
if found_parameters != set(parameter_names):
logging.warn(f"For index {index_name}, the parameters listed in "
f"parameter_name ({parameter_names}) are different from "
f"those defined in PARAMETER_definition "
f"({found_parameters}). Please check the table!")
index_function = {
'name': name,
'parameters': parameters,
}
return index_function
def prepare_record(record):
var_name = record['VarName']
no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = {
'var_name': var_name,
'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['allowed_freq']),
'default': build_periods(record['default_freq'])[0]},
'output': {
'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'],
'proposed_standard_name': record['OUTPUT_proposed_standard_name'],
'long_name': record['OUTPUT_long_name'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_user_units'],
},
'inputs': tr_inputs(no_inputs,
record['INPUT_variable'],
record['INPUT_standard_name'],
record['INPUT_cell_methods']),
'index_function': tr_index_function(
var_name,
record['index_function'],
no_parameters,
record['parameter_name'], record['PARAMETER_definition']),
'ET': {
'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'],
'definition': record['ET_definition'],
'comment': record['ET_comment'],
}
}
proposed_standard_name = record['OUTPUT_proposed_standard_name']
if proposed_standard_name.strip() != '':
d['output']['proposed_standard_name'] = proposed_standard_name
return d
def build_index_definitions(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='ALT_index_attr')
sheet.name_columns_by_row(0)
records = sheet.to_records()
index_definitions = []
for record in records:
try:
ready = int(record['ready'])
except ValueError:
ready = -1
if ready != 1:
continue
index_definitions.append(prepare_record(record))
return index_definitions
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import argparse import argparse
from contextlib import contextmanager
import logging
from jinja2 import Environment, PackageLoader from jinja2 import Environment, PackageLoader
import climix import climix
from .mastertable import build_index_definitions as build_master from .mastertable import build_index_definitions, build_variables
from .alttable import build_index_definitions as build_alt
def add_space(variable, quote=False):
value = variable.strip()
if len(value) > 0:
if quote:
return ' "{}"'.format(value)
else:
return ' {}'.format(value)
else:
return value
def prepare_environment(args): def prepare_environment(args):
loader = PackageLoader('climix.editor') loader = PackageLoader('climix.editor')
env = Environment(loader=loader, trim_blocks=True) env = Environment(
loader=loader,
trim_blocks=True,
)
env.filters['add_space'] = add_space
return env return env
...@@ -19,27 +35,41 @@ def parse_args(): ...@@ -19,27 +35,41 @@ def parse_args():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=(f'An editor for a climate index thing, ' description=(f'An editor for a climate index thing, '
f'version {climix.__version__}.')) f'version {climix.__version__}.'))
parser.add_argument('-o', '--output') parser.add_argument('-f', '--force', action='store_true')
parser.add_argument('-t', '--table',
choices=['index_attr', 'ALT_index_attr'],
default='ALT_index_attr')
parser.add_argument('document') parser.add_argument('document')
return parser.parse_args() return parser.parse_args()
@contextmanager
def opened_w_force(filename, force):
try:
f = open(filename, 'x')
except FileExistsError:
if force:
logging.warning('File {} already exists. '
'Overwriting due to --force'.format(filename))
f = open(filename, 'w')
else:
raise
try:
yield f
finally:
f.close()
def main(): def main():
args = parse_args() args = parse_args()
env = prepare_environment(args) env = prepare_environment(args)
template = env.get_template('index_definitions.yml') var_definition_template = env.get_template('variables.yml')
if args.table == 'index_attr': var_definitions = build_variables(args.document)
index_definitions = build_master(args.document) var_output = var_definition_template.render(variables=var_definitions)
elif args.table == 'ALT_index_attr':
index_definitions = build_alt(args.document) idx_definition_template = env.get_template('index_definitions.yml')
else: idx_definitions = build_index_definitions(args.document)
raise RuntimeError(f'Unknown table {args.table}') idx_output = idx_definition_template.render(indices=idx_definitions)
output = template.render(indices=index_definitions)
if args.output is None: with opened_w_force('variables.yml', args.force) as outfile:
print(output) outfile.write(var_output)
else:
with open(args.output, 'w') as outfile: with opened_w_force('index_definitions.yml', args.force) as outfile:
outfile.write(output) outfile.write(idx_output)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import logging
import regex as re
import pyexcel as pe import pyexcel as pe
from climix.index_functions.support import (SUPPORTED_OPERATORS,
SUPPORTED_REDUCERS)
def build_periods(spec): def build_periods(spec):
PERIODS = { PERIODS = {
...@@ -17,21 +21,6 @@ def build_periods(spec): ...@@ -17,21 +21,6 @@ def build_periods(spec):
return periods return periods
INDEX_FUNCTIONS = {
'FUN_count': 'count_occurrences',
'FUN_spell': 'spell_length',
}
def split_parts(no_parts, part_string):
if no_parts == 0:
assert part_string == '_'
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_cell_methods(cell_method_string): def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):' name = r'(?P<name>\w+):'
method = (r'(?P<method>(' method = (r'(?P<method>('
...@@ -53,49 +42,77 @@ def tr_cell_methods(cell_method_string): ...@@ -53,49 +42,77 @@ def tr_cell_methods(cell_method_string):
return cms return cms
def tr_inputs(no_inputs, def split_parts(no_parts, part_string):
variable_string, standard_name_string, cell_method_string): if no_parts == 0:
variables = split_parts(no_inputs, variable_string) return []
standard_names = split_parts(no_inputs, standard_name_string) parts = [p.strip() for p in part_string.split(',')]
cell_methods = [tr_cell_methods(cm) assert len(parts) == no_parts
for cm in split_parts(no_inputs, cell_method_string)] return parts
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
'standard_name': sn,
'cell_methods': cm})
return d
def tr_relops(no_thresholds, relop_string): def tr_inputs(input):
relops = split_parts(no_thresholds, relop_string) inputs = {}
d = [{'var_name': f'relop_{i}', for input_variable in input.split(','):
'kind': 'operator', key, variable = input_variable.split(':')
'operator': f'"{relop}"'} inputs[key.strip()] = variable.strip()
for i, relop in enumerate(relops)] return inputs
def tr_parameter(parameter):
if parameter['operator'] is not None:
d = {'var_name': parameter['name'],
'kind': 'operator',
'operator': parameter['operator']}
elif parameter['reducer'] is not None:
d = {'var_name': parameter['name'],
'kind': 'reducer',
'reducer': parameter['reducer']}
elif parameter['value'] is not None:
d = {'var_name': parameter['name'],
'kind': 'quantity',
'standard_name': parameter['standard_name'],
'proposed_standard_name': parameter['proposed_standard_name'],
'data': parameter['value'],
'units': parameter['units'],
'long_name': parameter['long_name']}
else:
raise RuntimeError(f"Invalid parameter found {parameter[0]}")
return d return d
def tr_index_function(name, no_thresholds, def split_parameter_definitions(parameter_definitions_string, parameter_names):
var_name_string, standard_name_string, name_regex = r'(?P<name>{})'.format('|'.join(parameter_names))
value_string, units_string, relop_string): op_regex = r'(?P<operator>{})'.format('|'.join(SUPPORTED_OPERATORS))
var_names = split_parts(no_thresholds, var_name_string) red_regex = r'(?P<reducer>{})'.format('|'.join(SUPPORTED_REDUCERS))
standard_names = split_parts(no_thresholds, standard_name_string) qty_regex = (
values = split_parts(no_thresholds, value_string) r'\(var_name: (?P<var_name>[^,]*), '
units = split_parts(no_thresholds, units_string) r'standard_name: (?P<standard_name>[^,]*), '
parameters = [] r'(proposed_standard_name: (?P<proposed_standard_name>[^,]*), )?'
for vn, sn, v, u in zip(var_names, standard_names, values, units): r'value: (?P<value>[^,]*), '
parameters.append({ r'unit: (?P<units>[^,)]*)(, |\))'
'var_name': vn, r'(long_name: \p{Pi}(?P<long_name>[^\p{Pf}]*)\p{Pf}\))?')
'kind': 'quantity', param_regex = r'{}: (?:{}|{}|{})'.format(
'standard_name': sn, name_regex, red_regex, op_regex, qty_regex
'data': v, )
'units': u, matcher = re.compile(param_regex)
}) result = [tr_parameter(p)
relops = tr_relops(no_thresholds, relop_string) for p in matcher.finditer(parameter_definitions_string)]
parameters.extend(relops) return result
def tr_index_function(index_name, name, no_thresholds,
parameter_names_string, parameter_definitions_string):
parameter_names = split_parts(no_thresholds, parameter_names_string)
parameters = split_parameter_definitions(parameter_definitions_string,
parameter_names)
found_parameters = set(p['var_name'] for p in parameters)
if found_parameters != set(parameter_names):
logging.warn(f"For index {index_name}, the parameters listed in "
f"parameter_name ({parameter_names}) are different from "
f"those defined in PARAMETER_definition "
f"({found_parameters}). Please check the table!")
index_function = { index_function = {
'name': INDEX_FUNCTIONS.get(name, name), 'name': name,
'parameters': parameters, 'parameters': parameters,
} }
return index_function return index_function
...@@ -104,29 +121,25 @@ def tr_index_function(name, no_thresholds, ...@@ -104,29 +121,25 @@ def tr_index_function(name, no_thresholds,
def prepare_record(record): def prepare_record(record):
var_name = record['VarName'] var_name = record['VarName']
no_parameters = int(record['N_parameters']) no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = { d = {
'var_name': var_name, 'var_name': var_name,
'reference': record['OUTPUT_reference'], 'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['freq']), 'period': {'allowed': build_periods(record['allowed_freq']),
'default': build_periods(record['default_freq'])[0]}, 'default': build_periods(record['default_freq'])[0]},
'output': { 'output': {
'var_name': var_name, 'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'], 'standard_name': record['OUTPUT_standard_name'],
'proposed_standard_name': record['OUTPUT_proposed_standard_name'],
'long_name': record['OUTPUT_long_name'], 'long_name': record['OUTPUT_long_name'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']), 'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_units'], 'units': record['OUTPUT_user_units'],
}, },
'inputs': tr_inputs(no_inputs, 'inputs': tr_inputs(record['input']),
record['INPUT_variable'],
record['INPUT_standard_name'],
record['INPUT_cell_methods']),
'index_function': tr_index_function( 'index_function': tr_index_function(
var_name,
record['index_function'], record['index_function'],
no_parameters, no_parameters,
record['PARAMETER_VarName'], record['PARAMETER_standard_name'], record['parameter_name'], record['PARAMETER_definition']),
record['PARAMETER_value'], record['PARAMETER_units'],
record['PARAMETER_relop']),
'ET': { 'ET': {
'short_name': record['ET_short_name'], 'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'], 'long_name': record['ET_long_name'],
...@@ -142,12 +155,41 @@ def prepare_record(record): ...@@ -142,12 +155,41 @@ def prepare_record(record):
def build_index_definitions(file_name): def build_index_definitions(file_name):
sheet = pe.get_sheet(file_name=file_name, sheet = pe.get_sheet(file_name=file_name,
sheet_name='index attr') sheet_name='index_definitions')
sheet.name_columns_by_row(0) sheet.name_columns_by_row(0)
records = sheet.to_records() records = sheet.to_records()
index_definitions = [] index_definitions = []
for record in records: for record in records:
if record['ready'] != 1: try:
ready = int(record['ready'])
except ValueError:
ready = -1
if ready != 1:
continue continue
index_definitions.append(prepare_record(record)) index_definitions.append(prepare_record(record))
return index_definitions return index_definitions
def prepare_variable_record(record):
var_name = record['var_name'].strip()
d = {
'var_name': var_name,
'standard_name': record['standard_name'].strip(),
'cell_methods': tr_cell_methods(record['cell_methods']),
'aliases': [a.strip() for a in record['aliases'].split(',')],
'comment': record['comment'].strip()
}
return d
def build_variables(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='variables')
sheet.name_columns_by_row(0)
records = sheet.to_records()
variables = []
for record in records:
if record['var_name'].strip() == '':
continue
variables.append(prepare_variable_record(record))
return variables
indices: indices:
{% for idx in indices %} {% for idx in indices %}
{{ idx.var_name }}: {{ idx.var_name }}:
reference: {{ idx.reference }} reference: {{- idx.reference|add_space }}
period: period:
allowed: allowed:
{% for p in idx.period.allowed %} {% for p in idx.period.allowed %}
{{ p }}: {{ p }}:
{% endfor %} {% endfor %}
default: {{ idx.period.default }} default: {{- idx.period.default|add_space }}
output: output:
var_name: {{ idx.output.var_name }} var_name: {{- idx.output.var_name|add_space(quote=True) }}
standard_name: {{ idx.output.standard_name }} standard_name: {{- idx.output.standard_name|add_space }}
{% if idx.output.proposed_standard_name %} {% if idx.output.proposed_standard_name %}
proposed_standard_name: {{ idx.output.proposed_standard_name }} proposed_standard_name: {{- idx.output.proposed_standard_name|add_space }}
{% endif %} {% endif %}
long_name: {{ idx.output.long_name }} long_name: {{- idx.output.long_name|add_space(quote=True) }}
units: {{ idx.output.units }} units: {{- idx.output.units|add_space(quote=True) }}
cell_methods: cell_methods:
{% for cm in idx.output.cell_methods %} {% for cm in idx.output.cell_methods %}
- {{ cm }} - {{ cm }}
{% endfor %} {% endfor %}
input: input:
{% for input in idx.inputs %} {% for argname, variable in idx.inputs.items() %}
var_name: {{ input.var_name }} {{ argname }}: {{- variable|add_space }}
standard_name: {{ input.standard_name }}
cell_methods:
{% for cm in input.cell_methods %}
- {{ cm }}
{% endfor %}
{% endfor %} {% endfor %}
index_function: index_function:
name: {{ idx.index_function.name }} name: {{- idx.index_function.name|add_space }}