Commit b8cfb104 authored by Klaus Zimmermann's avatar Klaus Zimmermann
Browse files

Update editor to deal with new master table (closes #161)

parent bd9c5640
# -*- coding: utf-8 -*-
import logging
import re
import pyexcel as pe
from climix.index_functions.support import (SUPPORTED_OPERATORS,
SUPPORTED_REDUCERS)
def build_periods(spec):
PERIODS = {
'ann': 'annual',
'sea': 'seasonal',
'mon': 'monthly',
'_': 'unknown',
'': 'unknown',
}
periods = [PERIODS[period] for period in spec.split('/')]
return periods
def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):'
method = (r'(?P<method>('
r'point|sum|maximum|maximum_absolute_value|median|'
r'mid_range|minimum|minimum_absolute_value|mean|'
r'mean_absolute_value|mean_of_upper_decile|mode|'
r'range|root_mean_square|standard_deviation|'
r'sum_of_squares|variance))')
where = r'where'
type1 = r'(?P<type1>\w+)'
type2 = r'(?P<type2>\w+)'
clim_indicator = r'(?P<indicator>(within|over))'
clim_unit = r'(?P<unit>(days|years))'
cell_method = re.compile(
f'({name} )+{method}'
f'(( {where} {type1}( over {type2})?)|'
f'( {clim_indicator} {clim_unit}))?')
cms = [m.group(0) for m in cell_method.finditer(cell_method_string)]
return cms
def split_parts(no_parts, part_string):
if no_parts == 0:
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_inputs(no_inputs,
variable_string, standard_name_string, cell_method_string):
variables = split_parts(no_inputs, variable_string)
standard_names = split_parts(no_inputs, standard_name_string)
cell_methods = split_parts(no_inputs, cell_method_string)
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
'standard_name': sn,
'cell_methods': tr_cell_methods(cm)})
return d
def tr_parameter(parameter):
if parameter['operator'] is not None:
d = {'var_name': parameter['name'],
'kind': 'operator',
'operator': parameter['operator']}
elif parameter['reducer'] is not None:
d = {'var_name': parameter['name'],
'kind': 'reducer',
'reducer': parameter['reducer']}
elif parameter['value'] is not None:
d = {'var_name': parameter['name'],
'kind': 'quantity',
'standard_name': parameter['standard_name'],
'data': parameter['value'],
'units': parameter['units']}
else:
raise RuntimeError(f"Invalid parameter found {parameter[0]}")
return d
def split_parameter_definitions(parameter_definitions_string, parameter_names):
name_regex = r'(?P<name>{})'.format('|'.join(parameter_names))
op_regex = r'(?P<operator>{})'.format('|'.join(SUPPORTED_OPERATORS))
red_regex = r'(?P<reducer>{})'.format('|'.join(SUPPORTED_REDUCERS))
qty_regex = (r'\(var_name: (?P<var_name>[^,]*), '
r'standard_name: (?P<standard_name>[^,]*), '
r'value: (?P<value>[^,]*), '
r'unit: (?P<units>[^)]*)\)')
param_regex = r'{}: (?:{}|{}|{})'.format(
name_regex, red_regex, op_regex, qty_regex
)
matcher = re.compile(param_regex)
result = [tr_parameter(p)
for p in matcher.finditer(parameter_definitions_string)]
return result
def tr_index_function(index_name, name, no_thresholds,
parameter_names_string, parameter_definitions_string):
parameter_names = split_parts(no_thresholds, parameter_names_string)
parameters = split_parameter_definitions(parameter_definitions_string,
parameter_names)
found_parameters = set(p['var_name'] for p in parameters)
if found_parameters != set(parameter_names):
logging.warn(f"For index {index_name}, the parameters listed in "
f"parameter_name ({parameter_names}) are different from "
f"those defined in PARAMETER_definition "
f"({found_parameters}). Please check the table!")
index_function = {
'name': name,
'parameters': parameters,
}
return index_function
def prepare_record(record):
var_name = record['VarName']
no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = {
'var_name': var_name,
'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['allowed_freq']),
'default': build_periods(record['default_freq'])[0]},
'output': {
'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'],
'proposed_standard_name': record['OUTPUT_proposed_standard_name'],
'long_name': record['OUTPUT_long_name'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_user_units'],
},
'inputs': tr_inputs(no_inputs,
record['INPUT_variable'],
record['INPUT_standard_name'],
record['INPUT_cell_methods']),
'index_function': tr_index_function(
var_name,
record['index_function'],
no_parameters,
record['parameter_name'], record['PARAMETER_definition']),
'ET': {
'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'],
'definition': record['ET_definition'],
'comment': record['ET_comment'],
}
}
proposed_standard_name = record['OUTPUT_proposed_standard_name']
if proposed_standard_name.strip() != '':
d['output']['proposed_standard_name'] = proposed_standard_name
return d
def build_index_definitions(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='ALT_index_attr')
sheet.name_columns_by_row(0)
records = sheet.to_records()
index_definitions = []
for record in records:
try:
ready = int(record['ready'])
except ValueError:
ready = -1
if ready != 1:
continue
index_definitions.append(prepare_record(record))
return index_definitions
# -*- coding: utf-8 -*-
import argparse
from contextlib import contextmanager
import logging
from jinja2 import Environment, PackageLoader
import climix
from .mastertable import build_index_definitions as build_master
from .alttable import build_index_definitions as build_alt
from .mastertable import build_index_definitions, build_variables
def add_space(variable, quote=False):
value = variable.strip()
if len(value) > 0:
if quote:
return ' "{}"'.format(value)
else:
return ' {}'.format(value)
else:
return value
def prepare_environment(args):
loader = PackageLoader('climix.editor')
env = Environment(loader=loader, trim_blocks=True)
env = Environment(
loader=loader,
trim_blocks=True,
)
env.filters['add_space'] = add_space
return env
......@@ -19,27 +35,41 @@ def parse_args():
parser = argparse.ArgumentParser(
description=(f'An editor for a climate index thing, '
f'version {climix.__version__}.'))
parser.add_argument('-o', '--output')
parser.add_argument('-t', '--table',
choices=['index_attr', 'ALT_index_attr'],
default='ALT_index_attr')
parser.add_argument('-f', '--force', action='store_true')
parser.add_argument('document')
return parser.parse_args()
@contextmanager
def opened_w_force(filename, force):
try:
f = open(filename, 'x')
except FileExistsError:
if force:
logging.warning('File {} already exists. '
'Overwriting due to --force'.format(filename))
f = open(filename, 'w')
else:
raise
try:
yield f
finally:
f.close()
def main():
args = parse_args()
env = prepare_environment(args)
template = env.get_template('index_definitions.yml')
if args.table == 'index_attr':
index_definitions = build_master(args.document)
elif args.table == 'ALT_index_attr':
index_definitions = build_alt(args.document)
else:
raise RuntimeError(f'Unknown table {args.table}')
output = template.render(indices=index_definitions)
if args.output is None:
print(output)
else:
with open(args.output, 'w') as outfile:
outfile.write(output)
var_definition_template = env.get_template('variables.yml')
var_definitions = build_variables(args.document)
var_output = var_definition_template.render(variables=var_definitions)
idx_definition_template = env.get_template('index_definitions.yml')
idx_definitions = build_index_definitions(args.document)
idx_output = idx_definition_template.render(indices=idx_definitions)
with opened_w_force('variables.yml', args.force) as outfile:
outfile.write(var_output)
with opened_w_force('index_definitions.yml', args.force) as outfile:
outfile.write(idx_output)
# -*- coding: utf-8 -*-
import re
import logging
import regex as re
import pyexcel as pe
from climix.index_functions.support import (SUPPORTED_OPERATORS,
SUPPORTED_REDUCERS)
def build_periods(spec):
PERIODS = {
......@@ -17,21 +21,6 @@ def build_periods(spec):
return periods
INDEX_FUNCTIONS = {
'FUN_count': 'count_occurrences',
'FUN_spell': 'spell_length',
}
def split_parts(no_parts, part_string):
if no_parts == 0:
assert part_string == '_'
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_cell_methods(cell_method_string):
name = r'(?P<name>\w+):'
method = (r'(?P<method>('
......@@ -53,49 +42,77 @@ def tr_cell_methods(cell_method_string):
return cms
def tr_inputs(no_inputs,
variable_string, standard_name_string, cell_method_string):
variables = split_parts(no_inputs, variable_string)
standard_names = split_parts(no_inputs, standard_name_string)
cell_methods = [tr_cell_methods(cm)
for cm in split_parts(no_inputs, cell_method_string)]
d = []
for vn, sn, cm in zip(variables, standard_names, cell_methods):
d.append({'var_name': vn,
'standard_name': sn,
'cell_methods': cm})
return d
def split_parts(no_parts, part_string):
if no_parts == 0:
return []
parts = [p.strip() for p in part_string.split(',')]
assert len(parts) == no_parts
return parts
def tr_relops(no_thresholds, relop_string):
relops = split_parts(no_thresholds, relop_string)
d = [{'var_name': f'relop_{i}',
def tr_inputs(input):
inputs = {}
for input_variable in input.split(','):
key, variable = input_variable.split(':')
inputs[key.strip()] = variable.strip()
return inputs
def tr_parameter(parameter):
if parameter['operator'] is not None:
d = {'var_name': parameter['name'],
'kind': 'operator',
'operator': f'"{relop}"'}
for i, relop in enumerate(relops)]
'operator': parameter['operator']}
elif parameter['reducer'] is not None:
d = {'var_name': parameter['name'],
'kind': 'reducer',
'reducer': parameter['reducer']}
elif parameter['value'] is not None:
d = {'var_name': parameter['name'],
'kind': 'quantity',
'standard_name': parameter['standard_name'],
'proposed_standard_name': parameter['proposed_standard_name'],
'data': parameter['value'],
'units': parameter['units'],
'long_name': parameter['long_name']}
else:
raise RuntimeError(f"Invalid parameter found {parameter[0]}")
return d
def tr_index_function(name, no_thresholds,
var_name_string, standard_name_string,
value_string, units_string, relop_string):
var_names = split_parts(no_thresholds, var_name_string)
standard_names = split_parts(no_thresholds, standard_name_string)
values = split_parts(no_thresholds, value_string)
units = split_parts(no_thresholds, units_string)
parameters = []
for vn, sn, v, u in zip(var_names, standard_names, values, units):
parameters.append({
'var_name': vn,
'kind': 'quantity',
'standard_name': sn,
'data': v,
'units': u,
})
relops = tr_relops(no_thresholds, relop_string)
parameters.extend(relops)
def split_parameter_definitions(parameter_definitions_string, parameter_names):
name_regex = r'(?P<name>{})'.format('|'.join(parameter_names))
op_regex = r'(?P<operator>{})'.format('|'.join(SUPPORTED_OPERATORS))
red_regex = r'(?P<reducer>{})'.format('|'.join(SUPPORTED_REDUCERS))
qty_regex = (
r'\(var_name: (?P<var_name>[^,]*), '
r'standard_name: (?P<standard_name>[^,]*), '
r'(proposed_standard_name: (?P<proposed_standard_name>[^,]*), )?'
r'value: (?P<value>[^,]*), '
r'unit: (?P<units>[^,)]*)(, |\))'
r'(long_name: \p{Pi}(?P<long_name>[^\p{Pf}]*)\p{Pf}\))?')
param_regex = r'{}: (?:{}|{}|{})'.format(
name_regex, red_regex, op_regex, qty_regex
)
matcher = re.compile(param_regex)
result = [tr_parameter(p)
for p in matcher.finditer(parameter_definitions_string)]
return result
def tr_index_function(index_name, name, no_thresholds,
parameter_names_string, parameter_definitions_string):
parameter_names = split_parts(no_thresholds, parameter_names_string)
parameters = split_parameter_definitions(parameter_definitions_string,
parameter_names)
found_parameters = set(p['var_name'] for p in parameters)
if found_parameters != set(parameter_names):
logging.warn(f"For index {index_name}, the parameters listed in "
f"parameter_name ({parameter_names}) are different from "
f"those defined in PARAMETER_definition "
f"({found_parameters}). Please check the table!")
index_function = {
'name': INDEX_FUNCTIONS.get(name, name),
'name': name,
'parameters': parameters,
}
return index_function
......@@ -104,29 +121,25 @@ def tr_index_function(name, no_thresholds,
def prepare_record(record):
var_name = record['VarName']
no_parameters = int(record['N_parameters'])
no_inputs = int(record['N_inputs'])
d = {
'var_name': var_name,
'reference': record['OUTPUT_reference'],
'period': {'allowed': build_periods(record['freq']),
'period': {'allowed': build_periods(record['allowed_freq']),
'default': build_periods(record['default_freq'])[0]},
'output': {
'var_name': var_name,
'standard_name': record['OUTPUT_standard_name'],
'proposed_standard_name': record['OUTPUT_proposed_standard_name'],
'long_name': record['OUTPUT_long_name'],
'cell_methods': tr_cell_methods(record['OUTPUT_cell_methods']),
'units': record['OUTPUT_units'],
'units': record['OUTPUT_user_units'],
},
'inputs': tr_inputs(no_inputs,
record['INPUT_variable'],
record['INPUT_standard_name'],
record['INPUT_cell_methods']),
'inputs': tr_inputs(record['input']),
'index_function': tr_index_function(
var_name,
record['index_function'],
no_parameters,
record['PARAMETER_VarName'], record['PARAMETER_standard_name'],
record['PARAMETER_value'], record['PARAMETER_units'],
record['PARAMETER_relop']),
record['parameter_name'], record['PARAMETER_definition']),
'ET': {
'short_name': record['ET_short_name'],
'long_name': record['ET_long_name'],
......@@ -142,12 +155,41 @@ def prepare_record(record):
def build_index_definitions(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='index attr')
sheet_name='index_definitions')
sheet.name_columns_by_row(0)
records = sheet.to_records()
index_definitions = []
for record in records:
if record['ready'] != 1:
try:
ready = int(record['ready'])
except ValueError:
ready = -1
if ready != 1:
continue
index_definitions.append(prepare_record(record))
return index_definitions
def prepare_variable_record(record):
var_name = record['var_name'].strip()
d = {
'var_name': var_name,
'standard_name': record['standard_name'].strip(),
'cell_methods': tr_cell_methods(record['cell_methods']),
'aliases': [a.strip() for a in record['aliases'].split(',')],
'comment': record['comment'].strip()
}
return d
def build_variables(file_name):
sheet = pe.get_sheet(file_name=file_name,
sheet_name='variables')
sheet.name_columns_by_row(0)
records = sheet.to_records()
variables = []
for record in records:
if record['var_name'].strip() == '':
continue
variables.append(prepare_variable_record(record))
return variables
indices:
{% for idx in indices %}
{{ idx.var_name }}:
reference: {{ idx.reference }}
reference: {{- idx.reference|add_space }}
period:
allowed:
{% for p in idx.period.allowed %}
{{ p }}:
{% endfor %}
default: {{ idx.period.default }}
default: {{- idx.period.default|add_space }}
output:
var_name: {{ idx.output.var_name }}
standard_name: {{ idx.output.standard_name }}
var_name: {{- idx.output.var_name|add_space(quote=True) }}
standard_name: {{- idx.output.standard_name|add_space }}
{% if idx.output.proposed_standard_name %}
proposed_standard_name: {{ idx.output.proposed_standard_name }}
proposed_standard_name: {{- idx.output.proposed_standard_name|add_space }}
{% endif %}
long_name: {{ idx.output.long_name }}
units: {{ idx.output.units }}
long_name: {{- idx.output.long_name|add_space(quote=True) }}
units: {{- idx.output.units|add_space(quote=True) }}
cell_methods:
{% for cm in idx.output.cell_methods %}
- {{ cm }}
{% endfor %}
input:
{% for input in idx.inputs %}
var_name: {{ input.var_name }}
standard_name: {{ input.standard_name }}
cell_methods:
{% for cm in input.cell_methods %}
- {{ cm }}
{% endfor %}
{% for argname, variable in idx.inputs.items() %}
{{ argname }}: {{- variable|add_space }}
{% endfor %}
index_function:
name: {{ idx.index_function.name }}
name: {{- idx.index_function.name|add_space }}
parameters:
{% for param in idx.index_function.parameters %}
{{ param.var_name }}:
kind: {{ param.kind }}
kind: {{- param.kind|add_space }}
{% if param.kind == 'quantity' %}
standard_name: {{ param.standard_name }}
data: {{ param.data }}
units: {{ param.units }}
standard_name: {{- param.standard_name|add_space }}
{% if param.proposed_standard_name is not none %}
proposed_standard_name: {{- param.proposed_standard_name|add_space }}
{% endif %}
{% if param.long_name is not none %}
long_name: {{- param.long_name|add_space(quote=True) }}
{% endif %}
data: {{- param.data|add_space }}
units: {{- param.units|add_space(quote=True) }}
{% elif param.kind == 'operator' %}
operator: '{{ param.operator }}'
operator: {{- param.operator|add_space(quote=True) }}
{% elif param.kind == 'reducer' %}
reducer: {{ param.reducer }}
reducer: {{- param.reducer|add_space }}
{% else %}
# Warning: Unknown kind!
{% endif %}
{% endfor %}
ET:
short_name: {{ idx.ET.short_name }}
long_name: {{ idx.ET.long_name }}
definition: {{ idx.ET.definition }}
comment: {{ idx.ET.comment }}
short_name: {{- idx.ET.short_name|add_space(quote=True) }}
long_name: {{- idx.ET.long_name|add_space(quote=True) }}
definition: {{- idx.ET.definition|add_space(quote=True) }}
comment: {{- idx.ET.comment|add_space(quote=True) }}
{% if not loop.last %}
{% endif %}
{% endfor %}
variables:
{% for var in variables %}
{{ var.var_name }}:
standard_name: {{ var.standard_name }}
cell_methods:
{% for cm in var.cell_methods %}
- {{ cm }}
{% endfor %}
aliases:
{% for alias in var.aliases %}
- {{ alias }}
{% endfor %}
{% if var.comment != '' %}
comment: {{ var.comment }}
{% endif %}
{% endfor %}
......@@ -39,6 +39,7 @@ setuptools.setup(
'numpy',