Commit a75001bb authored by Tomas Karlsson's avatar Tomas Karlsson
Browse files

Initial version

parents
*.rpm
.project
.settings
.classpath
.pydevproject
*.pyc
*.pyo
from eccodes import codes_set,codes_write,codes_grib_new_from_file,codes_get_message_size,codes_release,codes_get_values,codes_new_from_samples,CODES_PRODUCT_GRIB,codes_set_values,codes_get
from time import time
from pyexcel import save_as
from subprocess import call
from os import makedirs
from os.path import isdir
from shutil import rmtree
from json import dump,load
from sys import stderr
class GribCompressionBenchmark():
def __init__(self,testdataBits,tempFolder,testType):
self.testdataBits=testdataBits
self.testType=testType
self.stats={}
self.bitsPerValues=(24,18,16,12)
self.packingTypes=(
( #GRIB 1
#'grid_ccsds',
'grid_ieee',
#'grid_jpeg',
#'grid_png',
'grid_second_order',
#'grid_second_order_SPD1',
#'grid_second_order_SPD2',
#'grid_second_order_SPD3',
#'grid_second_order_boustrophedonic',
#'grid_second_order_constant_width',
#'grid_second_order_general_grib1',
#'grid_second_order_no_SPD',
#'grid_second_order_no_boustrophedonic',
#'grid_second_order_row_by_row',
'grid_simple',
#'grid_simple_log_preprocessing',
'grid_simple_matrix'
#'spectral_complex',
#'spectral_ieee',
#'spectral_simple'
),( #GRIB 2
'grid_ccsds',
#'grid_complex',
#'grid_complex_spatial_differencing',
'grid_ieee',
'grid_jpeg',
'grid_png',
'grid_second_order',
'grid_second_order_SPD1',
'grid_second_order_SPD2',
#'grid_second_order_SPD3', Crash on decoding
'grid_second_order_boustrophedonic',
'grid_second_order_constant_width',
#'grid_second_order_general_grib1',
'grid_second_order_no_SPD',
'grid_second_order_no_boustrophedonic',
'grid_second_order_row_by_row',
'grid_simple',
#'grid_simple_log_preprocessing',
'grid_simple_matrix'
#'spectral_complex',
#'spectral_ieee',
#'spectral_simple'
)
)
self.temp_folder=tempFolder
try:
if isdir(self.temp_folder):
rmtree(self.temp_folder)
makedirs(self.temp_folder)
except OSError:
if not isdir(self.temp_folder):
raise
self.baseFields=[]
fin=open('testdata%d.grib1'%(self.testdataBits))
while True:
gid=codes_grib_new_from_file(fin)
if gid is None:
break
self.baseFields.append(codes_get_values(gid))
codes_release(gid)
fin.close()
def createGRIB(self,prefix,gribEdition,packingType,bitsPerValue,values):
start=time()
gid=codes_new_from_samples('GRIB%d'%(gribEdition),CODES_PRODUCT_GRIB)
codes_set(gid,'shortName','t')
codes_set(gid,'level',2)
codes_set(gid,'dataDate',20180822)
codes_set(gid,'dataTime',1200)
codes_set(gid,'stepUnits',1)
codes_set(gid,'stepRange',1)
codes_set(gid,'startStep',1)
codes_set(gid,'endStep',1)
codes_set(gid,'gridType','lambert')
codes_set(gid,'Nx',889)
codes_set(gid,'Ny',949)
codes_set(gid,'latitudeOfFirstGridPointInDegrees',51.849)
codes_set(gid,'longitudeOfFirstGridPointInDegrees',0.238)
codes_set(gid,'LoVInDegrees',15)
codes_set(gid,'DxInMetres',2500)
codes_set(gid,'DyInMetres',2500)
codes_set(gid,'iScansNegatively',0)
codes_set(gid,'jScansPositively',1)
codes_set(gid,'jPointsAreConsecutive',0)
codes_set(gid,'Latin1InDegrees',63)
codes_set(gid,'Latin2InDegrees',63)
if str(packingType).startswith('grid_s'):
codes_set(gid,'bitsPerValue',bitsPerValue)
codes_set(gid,'packingType',packingType)
codes_set_values(gid,values)
else:
codes_set_values(gid,values)
codes_set(gid,'packingType',packingType)
f=open('%s/%s-%s-%02d.grib%d'%(self.temp_folder,prefix,packingType,bitsPerValue,gribEdition),'w')
codes_write(gid,f)
size=codes_get_message_size(gid)
codes_release(gid)
f.flush()
f.close()
end=time()
return ((end-start)*1000.0,size)
def compression(self):
print('\n#################################################################')
print('###################### COMPRESSION ##############################')
print('#################################################################\n')
#Make dummy write's to "warm up" eccodes
for gribEdition in (1,2):
self.createGRIB(0, gribEdition, 'grid_simple', 24, self.baseFields[0])
for gribEdition in (1,2):
for packingType in self.packingTypes[gribEdition-1]:
for bitsPerValue in self.bitsPerValues:
if not (packingType in ('grid_ccsds','grid_png','grid_jpeg','grid_ieee') and bitsPerValue != 24):
for i in range(len(self.baseFields)):
#print i, gribEdition, packingType, bitsPerValue,len(self.baseFields[i]),889*949
#try:
(elapsed,size)=self.createGRIB(i, gribEdition, packingType, bitsPerValue, self.baseFields[i])
#except Exception as e:
# print e
print('%d,%s,%02d,%d,%d,%f'%(gribEdition,packingType,bitsPerValue,i,size,elapsed))
statKey='%d-%s-%02d-%d'%(i,packingType,bitsPerValue,gribEdition)
if statKey not in self.stats:
self.stats[statKey]=[i,packingType,bitsPerValue,gribEdition,size,elapsed,0.0,0.0,0]
else:
self.stats[statKey][4]=size
self.stats[statKey][5]=elapsed
def verifyGRIB(self,prefix,gribEdition,packingType,bitsPerValue,values):
thresholds={32:0.000000009,24:0.000009,18:0.00009,16:0.0009,12:0.009}
start=time()
f=open('%s/%s-%s-%02d.grib%d'%(self.temp_folder,prefix,packingType,bitsPerValue,gribEdition))
gid=codes_grib_new_from_file(f)
f.close()
encValues=codes_get_values(gid)
bitsPerValue_e=codes_get(gid,'bitsPerValue')
packingType_e=codes_get(gid,'packingType')
codes_release(gid)
end=time()
if not bitsPerValue == bitsPerValue_e:
stderr.write('---------------- bits per value does not match bitsPerValue=%d bitsPerValue_e=%d\n'%(bitsPerValue,bitsPerValue_e))
if not packingType == packingType_e:
stderr.write('---------------- packingType does not match packingType=%s packingType_e=%s\n'%(packingType,packingType_e))
if len(values) > len(encValues):
stderr.write('---------------- no values does not match len values=%d len encValues=%d\n'%(len(values),len(encValues)))
return ((end-start)*1000.0,999999.9,999999999)
elif not len(values) == len(encValues):
stderr.write('---------------- no values does not match len values=%d len encValues=%d\n'%(len(values),len(encValues)))
failCount=0
max_error=0.0
for i in range(len(values)):
max_error=max(max_error,abs(encValues[i]-values[i]))
if not abs(encValues[i]-values[i])<thresholds[bitsPerValue]:
failCount+=1
return ((end-start)*1000.0,max_error,failCount)
def decompression(self):
print('\n#################################################################')
print('##################### DECOMPRESSION #############################')
print('#################################################################\n')
for gribEdition in (1,2):
for packingType in self.packingTypes[gribEdition-1]:
for bitsPerValue in self.bitsPerValues:
if not (packingType in ('grid_ccsds','grid_png','grid_jpeg','grid_ieee') and bitsPerValue != 24):
for i in range(len(self.baseFields)):
#print i, gribEdition, packingType, bitsPerValue,len(self.baseFields[i]),889*949
try:
(elapsed,max_error,failCount)=self.verifyGRIB(i, gribEdition, packingType, bitsPerValue, self.baseFields[i])
except Exception as e:
print e
print('%d,%s,%02d,%d,%f,%0.15f,%d'%(gribEdition,packingType,bitsPerValue,i,elapsed,max_error,failCount))
statKey='%d-%s-%02d-%d'%(i,packingType,bitsPerValue,gribEdition)
if statKey not in self.stats:
self.stats[statKey]=[i,packingType,bitsPerValue,gribEdition,0,0.0,elapsed,max_error,failCount]
else:
self.stats[statKey][6]=elapsed
self.stats[statKey][7]=max_error
self.stats[statKey][8]=failCount
def saveStats(self,fileType):
stats_ar=[]
stats_ar.append(['prefix','packingType','bitsPerValue','gribEdition','gribSize','compressionTime','decompressionTime','maxAbsoluteError','noErrorValues'])
stats_ar.extend(self.stats.values())
fp=open('x.json','w')
dump(stats_ar,fp)
fp.close()
fp=open('x.json')
stats_ar=load(fp)
fp.close()
save_as(array=stats_ar, dest_file_name='stats_%s_%dbit_base_fields.%s'%(self.testType,self.testdataBits,fileType),dest_sheet_name='details')
def cleanup(self):
rmtree(self.temp_folder)
if __name__ == '__main__':
for (tempFolder,testType) in (('tmp_folder','linda_ssd'),('/data/proj/arkitekt/tomas/gcb/tmp_folder','linda_fileserver')):
for bits in (16,24):
gcb=GribCompressionBenchmark(bits,tempFolder,testType)
gcb.compression()
call(['/usr/bin/sudo','/sbin/sysctl','-w','vm.drop_caches=3'])
gcb.decompression()
gcb.saveStats('ods')
gcb.cleanup()
# Installing:
No installation needed of benchmark, just clone repo and run.
The benchmark has dependesies to eccodes (tested with version 2.8.2) and pyexcel
To install pyexcel:
pip install pyexcel --user
pip install pyexcel-ods
# Running:
python GribCompressionBenchmark.py
# Test data
```
grib_ls -p shortName,typeOfLevel,level,bitsPerValue,packingType,standardDeviation,average,maximum,minimum *.grib1
testdata16.grib1
shortName typeOfLevel level bitsPerValue packingType standardDeviation average maximum minimum
10u heightAboveGround 10 16 grid_simple 3.12074 0.563963 10.5736 -10.6183
10v heightAboveGround 10 16 grid_simple 3.11777 -0.388955 11.1943 -11.416
2t heightAboveGround 2 16 grid_simple 4.34417 286.038 299.839 271.77
pres heightAboveSea 0 16 grid_simple 318.42 100881 101489 99349.9
tcc heightAboveGround 0 16 grid_simple 0.327711 0.793296 1 0
5 of 5 messages in testdata16.grib1
testdata24.grib1
cape heightAboveGround 0 24 grid_simple 16.8571 8.10925 331.413 0
levmaxCAT entireAtmosphere 0 24 grid_simple 2354.19 8782.11 12192 4572
tcc heightAboveGround 75 24 grid_simple 0.450249 0.545392 1 0
3 of 3 messages in testdata24.grib1
8 of 8 total messages in 2 files
```
# Notes on quirks
eccodes seems highly dependent of the order which metadata and packing parameters are set
This is a noncomprehensive list of findings from experimentation that should not be interpreted as the "truth"
more as input to own testing.
Allways inspect metadata and test truncation/accuracy of resulting gribs.
## grid_ccsds
Grib 2
only 24 bit avoid setting bitsPerValue
Order: values,packingType
## grid_jpeg
Grib 2
only 24 bit avoid setting bitsPerValue
Order: values,packingType
## grid_complex
Grib 1,2
crash during encoding
## grid_complex_spatial_differencing
Grib 1,2
destroys data
## grid_ieee
Grib 1,2
Order: values,packingType
do not set bitsPerValue that destroys data
huge files/high accuracy
## grid_png
Grib 2
only 24 bit avoid setting bitsPerValue
Order: values,packingType
## grid_second_order
Grib 1,2
Order: bitsPerValue,pack,values
works, testing truncation when lower number of bits is used recommended
There are several variants of second order packing they seem to behave similarly
## grid_simple grib 1,2
Order: bitsPerValue,packingType,values #Other order will produce all 24bit files
works, testing truncation when lower number of bits is used recommended
## grid_simple_matrix grib 1,2
Order: bitsPerValue,packingType,values #Other order will produce all 24bit files
works, testing truncation when lower number of bits is used recommended
## grid_simple_log_preprocessing
seg faults at least for grib2, in grib1 eccodes seems to auto select grid_simple
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment