Commit 1710b4d2 authored by Mattias Jakobsson's avatar Mattias Jakobsson
Browse files

Merge branch 'release/0.1' into 'master'

Release of version 0.1.0

First alpha release of pupygrib.

See merge request !1
parents 508d44ee a6567736
include LICENSE.txt
include .pypirc
include tox.ini
recursive-include tests *.py
recursive-include tests/data *
......@@ -3,18 +3,107 @@
Pupygrib (pronounced *puppy grib* and short for PUre PYthon GRIB) is a
light-weight pure Python GRIB reader. It's purpose is to extract data
from GRIB files with as little effort as possible. The user can then
freely choose how to further process the data.
freely choose how to further process the data (*read: pupygrib will
not help you*).
## Features
This project is still in the planning phase (as in no actual code
exists). The planned features are
This project is in the alpha phase, which means that many planned
features are missing and implemented features are not well tested. It
also means that the API may change in future version. The implemented
features are:
* Iterate over and extract the raw fields from GRIB edition 1 messages
in a file.
* Extract simply packed grid-point data values from GRIB edition 1
* Extract the coordinates for these values if they are on a
latitude/longitude grid.
The planned features are:
* Iterate over edition 1 or 2 GRIB messages in a file.
* Be able to easily identify (filter) the messages.
* Be able to extract the data values from a message.
* Be able to extract the coordinates of the data values.
* Be able to extract the data values for other packings.
* Be able to extract the coordinates of other grid types.
* Support for GRIB edition 2.
## Requirements
* [Python]( 2.7, 3.4 or 3.5. Earlier Python 3
versions may work, but they are not tested.
* [Numpy](
* [Six](
## Installation
$ pip install pupygrib
For development, the following command will install all extra
requirements, including test requirements:
$ pip install pupygrib[dev,test]
## Usage
To use pupygrib, you will need a good understanding of the GRIB
format, especially since table lookups are not yet implemented. ECMWF
provides an overview of GRIB
[edition 1](
[edition 2](
that can be used as references.
Iterate over the messages in a GRIB file and extract the coordinates
and values:
>>> import pupygrib
>>> with open('tests/data/regular_latlon_surface.grib1', 'rb') as stream:
... for i, msg in enumerate(, 1):
... lons, lats = msg.get_coordinates()
... values = msg.get_values()
... print("Message {}: {:.3f} {}".format(i, values.mean(), lons.shape))
Message 1: 291.585 (31, 16)
Access a section of a GRIB message and print its fields:
>>> with open('tests/data/regular_latlon_surface.grib1', 'rb') as stream:
... msg, =
>>> sorted(msg[0].fieldnames) # fieldnames is a set, so we can't trust the order
['editionNumber', 'identifier', 'totalLength']
>>> msg[0].totalLength
>>> msg[3] is None # the bit-map section is not included in this message
## Development
Pull requests are most welcome! I do ask that you add test cases and
update the documentation for any new features. Make sure that the
following checks (coding style and unit tests, respectively) pass
without any warnings or errors. [Tox](
should have at least one supported Python 2 and Python 3 version
$ flake8
$ tox
## License
"""A light-weight pure Python GRIB reader."""
from __future__ import unicode_literals
from itertools import repeat, takewhile
import pkg_resources
from six.moves import map
from pupygrib import binary
from pupygrib import edition1
from pupygrib import edition2
from pupygrib.exceptions import ParseError
__version__ = pkg_resources.get_distribution('pupygrib').version
def _try_read_message(stream):
# Try to read one GRIB message from *stream*. If end-of-file is
# encountered immediately, return None. If end-of-file is reached
# while parsing the message or if the parsing fails for some other
# reason, a ParseError is raised.
# Check that we have a GRIB message
startpos = stream.tell()
magic =
if not magic:
return None
if magic != b'GRIB':
raise ParseError("not a GRIB message")
# Find the edition and length
header = binary.checkread(stream, 4)
edition = binary.unpack_uint8_from(header, 3)
if edition == 1:
length = binary.unpack_uint24_from(header)
message_class = edition1.Message
elif edition == 2:
length = binary.unpack_uint64_from(binary.checkread(stream, 8))
message_class = edition2.Message
raise ParseError("unknown edition number '{}'".format(edition))
# Read and check the end of the message
data = binary.checkread(stream, length)
if data[-4:] != b'7777':
raise ParseError("end-of-message marker not found")
# Create and return the message instance
return message_class(data)
def read(stream):
"""Iterate over a GRIB file's messages.
*stream* should be a readable binary file-like object that
supports random access. The easiest way to obtain such an object
is to use the built-in open() function with mode set to 'rb'.
return takewhile(lambda msg: msg, map(_try_read_message, repeat(stream)))
"""Base class for GRIB messages of edition 1 and 2."""
from __future__ import unicode_literals
import itertools
import six
from pupygrib.fields import Field
class SectionMeta(type):
"""Meta class for GRIB message sections.
This meta class automatically assign names to fields from the
attribute name they are given on the class.
def __init__(cls, name, bases, namespace, **kwargs):
fieldnames = set(itertools.chain.from_iterable(
base.fieldnames for base in bases if hasattr(base, 'fieldnames')
for key, value in six.iteritems(namespace):
if isinstance(value, Field): = key
cls.fieldnames = fieldnames
class Section(object):
"""Base class for sections of GRIB messages.
The *data* argument should be a memoryview of the whole GRIB
message. *offset* and *length* gives the slice that belongs to
this section of that view.
def __init__(self, data, offset, length):
self.offset = offset
self.end = offset + length
self._data = data[self.offset:self.end]
class Message(object):
"""Base class for GRIB messages of edition 1 and 2.
The *data* argument should be a bytes-like object containing the
whole GRIB message.
def __init__(self, data):
self._data = memoryview(data)
self._sections = {}
def __getitem__(self, index):
"""Return a section of the GRIB message with the given *index*.
If *index* is a valid section for the current GRIB edition but
not included in the message, None is returned.
return self._sections[index]
except KeyError:
getter = getattr(self, '_get_section{}'.format(index))
except AttributeError:
raise IndexError("no such section")
self._sections[index] = section = getter()
return section
"""Functions to handle binary data."""
from __future__ import unicode_literals
import struct
from pupygrib.exceptions import ParseError
_uint8struct = struct.Struct(b'>B')
_uint16struct = struct.Struct(b'>H')
_uint24struct = struct.Struct(b'>HB')
_uint32struct = struct.Struct(b'>I')
_uint64struct = struct.Struct(b'>Q')
error = struct.error
def unpack_grib1float_from(buf, offset=0):
"""Unpack a 32-bit GRIB 1 floating point value from *buf* at *offset*.
GRIB edition 1 does not use the IEEE 754 floating point standard.
Instead, it uses a leading sign bit (s), a 7-bit exponent (A), and
a 24-bit significand (B). The represented value is then given by
R = (-1)^s * 2^-24 * B * 16^(A - 64)
i, = _uint32struct.unpack_from(buf, offset)
s = i >> 31
A = (i & 0x7fffffff) >> 24
B = i & 0x00ffffff
return (-1)**s * _grib1_float_magic * B * 16**(A - 64)
_grib1_float_magic = 2**-24
def unpack_int8_from(buf, offset=0):
"""Unpack an 8-bit signed magnitude integer from *buf* at *offset*."""
b, = _uint8struct.unpack_from(buf, offset)
return (-1)**(b >> 7) * (b & 0x7f)
def unpack_int16_from(buf, offset=0):
"""Unpack a 16-bit signed magnitude integer from *buf* at *offset*."""
h, = _uint16struct.unpack_from(buf, offset)
return (-1)**(h >> 15) * (h & 0x7fff)
def unpack_int24_from(buf, offset=0):
"""Unpack a 24-bit signed magnitude integer from *buf* at *offset*."""
h, b = _uint24struct.unpack_from(buf, offset)
return (-1)**(h >> 15) * (((h & 0x7fff) << 8) + b)
def unpack_uint8_from(buf, offset=0):
"""Unpack an 8-bit unsigned integer from *buf* at *offset*."""
return _uint8struct.unpack_from(buf, offset)[0]
def unpack_uint16_from(buf, offset=0):
"""Unpack a 16-bit unsigned integer from *buf* at *offset*."""
return _uint16struct.unpack_from(buf, offset)[0]
def unpack_uint24_from(buf, offset=0):
"""Unpack a 24-bit unsigned integer from *buf* at *offset*."""
h, b = _uint24struct.unpack_from(buf, offset)
return (h << 8) + b
def unpack_uint32_from(buf, offset=0):
"""Unpack a 32-bit unsigned integer from *buf* at *offset*."""
return _uint32struct.unpack_from(buf, offset)[0]
def unpack_uint64_from(buf, offset=0):
"""Unpack a 64-bit unsigned integer from *buf* at *offset*."""
return _uint64struct.unpack_from(buf, offset)[0]
def checkread(stream, n):
"""Read exactly *n* bytes from *stream*.
A ParseError is raised if less than *n* bytes are available.
data =
if len(data) < n:
raise ParseError('unexpected end of file')
return data
"""An edition 1 GRIB message."""
from __future__ import unicode_literals
import numpy
from pupygrib import base
from pupygrib import binary
from pupygrib import fields as basefields # Python gets confused without alias
from pupygrib.edition1 import section1
from pupygrib.edition1 import section2
from pupygrib.edition1 import section3
from pupygrib.edition1 import section4
class IndicatorSection(base.Section):
"""The indicator section (0) of an edition 1 GRIB message."""
identifier = basefields.BytesField(1, 4)
totalLength = basefields.Uint24Field(5)
editionNumber = basefields.Uint8Field(8)
class EndSection(base.Section):
"""The end section (5) of an edition 1 GRIB message."""
endOfMessage = basefields.BytesField(1, 4)
class Message(base.Message):
"""An edition 1 GRIB message."""
def get_coordinates(self):
"""Return the coordinates of this message's data points."""
griddesc = self[2]
if griddesc is None:
raise NotImplementedError(
"pupygrib does not support catalogued grids"
return griddesc._get_coordinates()
def get_values(self):
"""Return the data values of this message."""
values = self[4]._unpack_values()
values = self[1]._scale_values(values)
bitmapdesc = self[3]
if bitmapdesc:
mask = ~numpy.array(bitmapdesc.bitmap, dtype=bool)
mvalues = numpy.empty(len(mask), dtype=float)
mvalues[~mask] = values
values =, mask=mask)
griddesc = self[2]
if griddesc:
values = griddesc._order_values(values)
return values
def _get_section0(self):
return IndicatorSection(self._data, 0, 8)
def _get_section1(self):
offset = self[0].end
length = binary.unpack_uint24_from(self._data, offset)
return section1.get_section(self._data, offset, length)
def _get_section2(self):
proddef = self[1]
if not proddef.section1Flags & 0x80:
return None
offset = proddef.end
length = binary.unpack_uint24_from(self._data, offset)
return section2.get_section(self._data, offset, length)
def _get_section3(self):
proddef = self[1]
if not proddef.section1Flags & 0x40:
return None
offset = (self[2] or proddef).end
length = binary.unpack_uint24_from(self._data, offset)
return section3.BitMapSection(self._data, offset, length)
def _get_section4(self):
offset = (self[3] or self[2] or self[1]).end
length = binary.unpack_uint24_from(self._data, offset)
return section4.get_section(self._data, offset, length)
def _get_section5(self):
return EndSection(self._data, self[4].end, 4)
"""Fields for edition 1 GRIB messages."""
from __future__ import unicode_literals
from pupygrib import base
from pupygrib import binary
class FloatField(base.Field):
"""A 32-bit GRIB 1 floating point field."""
def get_value(self, section, offset):
return binary.unpack_grib1float_from(section._data, offset)
"""Product definition sections of edition 1 GRIB messages."""
from __future__ import unicode_literals
import numpy
from pupygrib import base
from pupygrib import binary
from pupygrib import fields
class LevelField(base.Field):
"""Height, pressure, etc. of levels."""
split_level_types = {
101, 104, 106, 108, 110, 112, 114, 116, 120, 121, 128, 141
def get_value(self, section, offset=10):
if section.indicatorOfTypeOfLevel in self.split_level_types:
l1 = binary.unpack_uint8_from(section._data, offset)
l2 = binary.unpack_uint8_from(section._data, offset + 1)
return (l1, l2)
return binary.unpack_uint16_from(section._data, offset)
class ProductDefinitionSection(base.Section):
"""The product definition section (1) of an edition 1 GRIB message."""
section1Length = fields.Uint24Field(1)
table2Version = fields.Uint8Field(4)
centre = fields.Uint8Field(5)
generatingProcessIdentifier = fields.Uint8Field(6)
gridDefinition = fields.Uint8Field(7)
section1Flags = fields.Uint8Field(8)
indicatorOfParameter = fields.Uint8Field(9)
indicatorOfTypeOfLevel = fields.Uint8Field(10)
level = LevelField(11)
yearOfCentury = fields.Uint8Field(13)
month = fields.Uint8Field(14)
day = fields.Uint8Field(15)
hour = fields.Uint8Field(16)
minute = fields.Uint8Field(17)
unitOfTimeRange = fields.Uint8Field(18)
P1 = fields.Uint8Field(19)
P2 = fields.Uint8Field(20)
timeRangeIndicator = fields.Uint8Field(21)
numberIncludedInAverage = fields.Uint16Field(22)
numberMissingFromAveragesOrAccumulations = fields.Uint8Field(24)
centuryOfReferenceTimeOfData = fields.Uint8Field(25)
subCentre = fields.Uint8Field(26)
decimalScaleFactor = fields.Int16Field(27)
def _scale_values(self, values):
return 10**-self.decimalScaleFactor * values
class LocalProductDefinitionSection(ProductDefinitionSection):
"""A local product definition section if an edition 1 GRIB message."""
localDefinitionNumber = fields.Uint8Field(41)
class MatchV1ProductSection(LocalProductDefinitionSection):
"""A MATCH v1.0 product definition section of a GRIB 1 message."""
generatingProcess = fields.Uint8Field(42)
sort = fields.Uint8Field(43)
timeRepres = fields.Uint8Field(44)
landType = fields.Uint8Field(45)
suplScale = fields.Int16Field(46)
molarMass = fields.Uint16Field(48)
logTransform = fields.Uint8Field(50)
threshold = fields.Int16Field(51)
totalSizeClasses = fields.Uint8Field(60)
sizeClassNumber = fields.Uint8Field(61)
integerScaleFactor = fields.Int8Field(62)
lowerRange = fields.Uint16Field(63)
upperRange = fields.Uint16Field(65)
meanSize = fields.Uint16Field(67)
STDV = fields.Uint16Field(69)
def _scale_values(self, values):
values = super(MatchV1ProductSection, self)._scale_values(values)
if self.logTransform:
values = numpy.exp(values)
return values
def get_section(buf, offset, length):
"""Return a new section 1 of the correct type from *buf* at *offset*."""
if length <= 40:
return ProductDefinitionSection(buf, offset, length)
proddef = LocalProductDefinitionSection(buf, offset, length)
sectionclass = {
(82, 0, 2): MatchV1ProductSection,
}[(proddef.centre, proddef.subCentre, proddef.localDefinitionNumber)]
except KeyError:
return proddef
return sectionclass(buf, offset, length)
"""Grid description sections of edition 1 GRIB messages."""
from __future__ import unicode_literals
import numpy
from pupygrib import base
from pupygrib import fields
from pupygrib.edition1.fields import FloatField
class GridDescriptionSection(base.Section):
"""The grid description section (2) of an edition 1 GRIB message."""
section2Length = fields.Uint24Field(1)
numberOfVerticalCoordinateValues = fields.Uint8Field(4)
pvlLocation = fields.Uint8Field(5)
dataRepresentationType = fields.Uint8Field(6)
def _order_values(self, values):
return numpy.asanyarray(values)
def _get_coordinates(self):
raise NotImplementedError(
"pupygrib does not support grids with data reporesentation type "
class LatitudeLongitudeGridSection(GridDescriptionSection):
"""A latitude/longitude grid section (2) of an edition 1 GRIB message."""
Ni = fields.Uint16Field(7)
Nj = fields.Uint16Field(9)
latitudeOfFirstGridPoint = fields.Int24Field(11)
longitudeOfFirstGridPoint = fields.Int24Field(14)
resolutionAndComponentFlags = fields.Uint8Field(17)