source: flex_extract.git/Source/Python/Classes/MarsRetrieval.py @ 6857073

dev
Last change on this file since 6857073 was 6857073, checked in by Anne Tipka <anne.philipp@…>, 22 months ago

spelling corrections

  • Property mode set to 100644
File size: 24.0 KB
Line 
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#*******************************************************************************
4# @Author: Anne Fouilloux (University of Oslo)
5#
6# @Date: October 2014
7#
8# @Change History:
9#
10#   November 2015 - Leopold Haimberger (University of Vienna):
11#        - optimized display_info
12#        - optimized data_retrieve and seperate between python and shell
13#          script call
14#
15#   February 2018 - Anne Philipp (University of Vienna):
16#        - applied PEP8 style guide
17#        - added documentation
18#        - applied some minor modifications in programming style/structure
19#        - added writing of mars request attributes to a csv file
20#
21# @License:
22#    (C) Copyright 2014-2020.
23#    Anne Philipp, Leopold Haimberger
24#
25#    SPDX-License-Identifier: CC-BY-4.0
26#
27#    This work is licensed under the Creative Commons Attribution 4.0
28#    International License. To view a copy of this license, visit
29#    http://creativecommons.org/licenses/by/4.0/ or send a letter to
30#    Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
31#*******************************************************************************
32
33# ------------------------------------------------------------------------------
34# MODULES
35# ------------------------------------------------------------------------------
36from __future__ import print_function
37
38import os
39import sys
40import subprocess
41import traceback
42
43# software specific classes and modules from flex_extract
44#pylint: disable=wrong-import-position
45sys.path.append('../')
46import _config
47#pylint: disable=invalid-name
48try:
49    ec_api = True
50    import ecmwfapi
51except ImportError:
52    ec_api = False
53
54try:
55    cds_api = True
56    import cdsapi
57except ImportError:
58    cds_api = False
59#pylint: enable=invalid-name
60#pylint: enable=wrong-import-position
61# ------------------------------------------------------------------------------
62# CLASS
63# ------------------------------------------------------------------------------
64class MarsRetrieval(object):
65    '''Specific syntax and content for submission of MARS retrievals.
66
67    A MARS revtrieval has a specific syntax with a selection of keywords and
68    their corresponding values. This class provides the necessary functions
69    by displaying the selected parameters and their values and the actual
70    retrievement of the data through a mars request or a Python web api
71    interface. The initialization already expects all the keyword values.
72
73    A description of MARS keywords/arguments and examples of their
74    values can be found here:
75    https://software.ecmwf.int/wiki/display/UDOC/\
76                   Identification+keywords#Identificationkeywords-class
77
78    Attributes
79    ----------
80    server : ECMWFService or ECMWFDataServer
81        This is the connection to the ECMWF data servers.
82
83    public : int
84        Decides which Web API Server version is used.
85
86    marsclass : str, optional
87        Characterisation of dataset.
88
89    dataset : str, optional
90        For public datasets there is the specific naming and parameter
91        dataset which has to be used to characterize the type of
92        data.
93
94    type : str, optional
95        Determines the type of fields to be retrieved.
96
97    levtype : str, optional
98        Denotes type of level.
99
100    levelist : str, optional
101        Specifies the required levels.
102
103    repres : str, optional
104        Selects the representation of the archived data.
105
106    date : str, optional
107        Specifies the Analysis date, the Forecast base date or
108        Observations date.
109
110    resol : str, optional
111        Specifies the desired triangular truncation of retrieved data,
112        before carrying out any other selected post-processing.
113
114    stream : str, optional
115        Identifies the forecasting system used to generate the data.
116
117    area : str, optional
118        Specifies the desired sub-area of data to be extracted.
119
120    time : str, optional
121        Specifies the time of the data in hours and minutes.
122
123    step : str, optional
124        Specifies the forecast time step from forecast base time.
125
126    expver : str, optional
127        The version of the dataset.
128
129    number : str, optional
130        Selects the member in ensemble forecast run.
131
132    accuracy : str, optional
133        Specifies the number of bits per value to be used in the
134        generated GRIB coded fields.
135
136    grid : str, optional
137        Specifies the output grid which can be either a Gaussian grid
138        or a Latitude/Longitude grid.
139
140    gaussian : str, optional
141        This parameter is deprecated and should no longer be used.
142        Specifies the desired type of Gaussian grid for the output.
143
144    target : str, optional
145        Specifies a file into which data is to be written after
146        retrieval or manipulation.
147
148    param : str, optional
149        Specifies the meteorological parameter.
150    '''
151
152    def __init__(self, server, public, marsclass="EA", dataset="", type="",
153                 levtype="", levelist="", repres="", date="", resol="",
154                 stream="", area="", time="", step="", expver="1",
155                 number="", accuracy="", grid="", gaussian="", target="",
156                 param=""):
157        '''Initialises the instance of the MarsRetrieval class and
158        defines and assigns a set of the necessary retrieval parameters
159        for the FLEXPART input data.
160        A description of MARS keywords/arguments, their dependencies
161        on each other and examples of their values can be found here:
162
163        https://software.ecmwf.int/wiki/display/UDOC/MARS+keywords
164
165        Parameters
166        ----------
167        server : ECMWFService or ECMWFDataServer
168            This is the connection to the ECMWF data servers.
169            It is needed for the pythonic access of ECMWF data.
170
171        public : int
172            Decides which Web API version is used:
173            0: member-state users and full archive access
174            1: public access and limited access to the public server and
175               datasets. Needs the parameter dataset.
176            Default is "0" and for member-state users.
177
178        marsclass : str, optional
179            Characterisation of dataset. E.g. EI (ERA-Interim),
180            E4 (ERA40), OD (Operational archive), EA (ERA5).
181            Default is the ERA5 dataset "EA".
182
183        dataset : str, optional
184            For public datasets there is the specific naming and parameter
185            dataset which has to be used to characterize the type of
186            data. Usually there is less data available, either in times,
187            domain or parameter.
188            Default is an empty string.
189
190        type : str, optional
191            Determines the type of fields to be retrieved.
192            Selects between observations, images or fields.
193            Examples for fields: Analysis (an), Forecast (fc),
194            Perturbed Forecast (pf), Control Forecast (cf) and so on.
195            Default is an empty string.
196
197        levtype : str, optional
198            Denotes type of level. Has a direct implication on valid
199            levelist values!
200            E.g. model level (ml), pressure level (pl), surface (sfc),
201            potential vorticity (pv), potential temperature (pt)
202            and depth (dp).
203            Default is an empty string.
204
205        levelist : str, optional
206            Specifies the required levels. It has to have a valid
207            correspondence to the selected levtype.
208            Examples: model level: 1/to/137, pressure levels: 500/to/1000
209            Default is an empty string.
210
211        repres : str, optional
212            Selects the representation of the archived data.
213            E.g. sh - spherical harmonics, gg - Gaussian grid,
214            ll - latitude/longitude, ...
215            Default is an empty string.
216
217        date : str, optional
218            Specifies the Analysis date, the Forecast base date or
219            Observations date. Valid formats are:
220            Absolute as YYYY-MM-DD or YYYYMMDD.
221            Default is an empty string.
222
223        resol : str, optional
224            Specifies the desired triangular truncation of retrieved data,
225            before carrying out any other selected post-processing.
226            The default is automatic truncation (auto), by which the lowest
227            resolution compatible with the value specified in grid is
228            automatically selected for the retrieval.
229            Users wanting to perform post-processing from full spectral
230            resolution should specify Archived Value (av).
231            The following are examples of existing resolutions found in
232            the archive: 63, 106, 159, 213, 255, 319, 399, 511, 799 or 1279.
233            This keyword has no meaning/effect if the archived data is
234            not in spherical harmonics representation.
235            The best selection can be found here:
236            https://software.ecmwf.int/wiki/display/UDOC/\
237                  Retrieve#Retrieve-Truncationbeforeinterpolation
238            Default is an empty string.
239
240        stream : str, optional
241            Identifies the forecasting system used to generate the data.
242            E.g. oper (Atmospheric model), enfo (Ensemble forecats), ...
243            Default is an empty string.
244
245        area : str, optional
246            Specifies the desired sub-area of data to be extracted.
247            Areas can be defined to wrap around the globe.
248
249            Latitude values must be given as signed numbers, with:
250                north latitudes (i.e. north of the equator)
251                    being positive (e.g: 40.5)
252                south latitutes (i.e. south of the equator)
253                    being negative (e.g: -50.5)
254            Longtitude values must be given as signed numbers, with:
255                east longitudes (i.e. east of the 0 degree meridian)
256                    being positive (e.g: 35.0)
257                west longitudes (i.e. west of the 0 degree meridian)
258                    being negative (e.g: -20.5)
259
260            E.g.: North/West/South/East
261            Default is an empty string.
262
263        time : str, optional
264            Specifies the time of the data in hours and minutes.
265            Valid values depend on the type of data: Analysis time,
266            Forecast base time or First guess verification time
267            (all usually at synoptic hours: 00, 06, 12 and 18 ).
268            Observation time (any combination in hours and minutes is valid,
269            subject to data availability in the archive).
270            The syntax is HHMM or HH:MM. If MM is omitted it defaults to 00.
271            Default is an empty string.
272
273        step : str, optional
274            Specifies the forecast time step from forecast base time.
275            Valid values are hours (HH) from forecast base time. It also
276            specifies the length of the forecast which verifies at
277            First Guess time.
278            E.g. 1/3/6-hourly
279            Default is an empty string.
280
281        expver : str, optional
282            The version of the dataset. Each experiment is assigned a
283            unique code (version). Production data is assigned 1 or 2,
284            and experimental data in Operations 11, 12 ,...
285            Research or Member State's experiments have a four letter
286            experiment identifier.
287            Default is "1".
288
289        number : str, optional
290            Selects the member in ensemble forecast run. (Only then it
291            is necessary.) It has a different meaning depending on
292            the type of data.
293            E.g. Perturbed Forecasts: specifies the Ensemble forecast member
294            Default is an empty string.
295
296        accuracy : str, optional
297            Specifies the number of bits per value to be used in the
298            generated GRIB coded fields.
299            A positive integer may be given to specify the preferred number
300            of bits per packed value. This must not be greater than the
301            number of bits normally used for a Fortran integer on the
302            processor handling the request (typically 32 or 64 bit).
303            Within a compute request the accuracy of the original fields
304            can be passed to the result field by specifying accuracy=av.
305            Default is an empty string.
306
307        grid : str, optional
308            Specifies the output grid which can be either a Gaussian grid
309            or a Latitude/Longitude grid. MARS requests specifying
310            grid=av will return the archived model grid.
311
312            Lat/Lon grid: The grid spacing needs to be an integer
313            fraction of 90 degrees e.g. grid = 0.5/0.5
314
315            Gaussian grid: specified by a letter denoting the type of
316            Gaussian grid followed by an integer (the grid number)
317            representing the number of lines between the Pole and Equator,
318            e.g.
319            grid = F160 - full (or regular) Gaussian grid with
320                   160 latitude lines between the pole and equator
321            grid = N320 - ECMWF original reduced Gaussian grid with
322                   320 latitude lines between the pole and equator,
323                   see Reduced Gaussian Grids for grid numbers used at ECMWF
324            grid = O640 - ECMWF octahedral (reduced) Gaussian grid with
325                   640 latitude lines between the pole and equator
326            Default is an empty string.
327
328        gaussian : str, optional
329            This parameter is deprecated and should no longer be used.
330            Specifies the desired type of Gaussian grid for the output.
331            Valid Gaussian grids are quasi-regular (reduced) or regular.
332            Keyword gaussian can only be specified together with
333            keyword grid. Gaussian without grid has no effect.
334            Default is an empty string.
335
336        target : str, optional
337            Specifies a file into which data is to be written after
338            retrieval or manipulation. Path names should always be
339            enclosed in double quotes. The MARS client supports automatic
340            generation of multiple target files using MARS keywords
341            enclosed in square brackets [ ].  If the environment variable
342            MARS_MULTITARGET_STRICT_FORMAT is set to 1 before calling mars,
343            the keyword values will be used in the filename as shown by
344            the ecCodes GRIB tool grib_ls -m, e.g. with
345            MARS_MULTITARGET_STRICT_FORMAT set to 1 the keywords time,
346            expver and param will be formatted as 0600, 0001 and 129.128
347            rather than 600, 1 and 129.
348            Default is an empty string.
349
350        param : str, optional
351            Specifies the meteorological parameter.
352            The list of meteorological parameters in MARS is extensive.
353            Their availability is directly related to their meteorological
354            meaning and, therefore, the rest of directives specified
355            in the MARS request.
356            Meteorological parameters can be specified by their
357            GRIB code (param=130), their mnemonic (param=t) or
358            full name (param=temperature).
359            The list of parameter should be seperated by a "/"-sign.
360            E.g. 130/131/133
361            Default is an empty string.
362
363        Return
364        ------
365
366        '''
367
368        self.server = server
369        self.public = public
370        self.marsclass = marsclass
371        self.dataset = dataset
372        self.type = type
373        self.levtype = levtype
374        self.levelist = levelist
375        self.repres = repres
376        self.date = date
377        self.resol = resol
378        self.stream = stream
379        self.area = area
380        self.time = time
381        self.step = step
382        self.expver = expver
383        self.number = number
384        self.accuracy = accuracy
385        self.grid = grid
386        self.gaussian = gaussian
387        self.target = target
388        self.param = param
389
390        return
391
392
393    def display_info(self):
394        '''Prints all class attributes and their values to the
395        standard output.
396
397        Parameters
398        ----------
399
400        Return
401        ------
402
403        '''
404        # Get all class attributes and their values as a dictionary
405        attrs = vars(self).copy()
406
407        # iterate through all attributes and print them
408        # with their corresponding values
409        for item in attrs.items():
410            if item[0] in ['server', 'public']:
411                pass
412            else:
413                print(item[0] + ': ' + str(item[1]))
414
415        return
416
417
418    def print_infodata_csv(self, inputdir, request_number):
419        '''Write all request parameter in alpabetical order into a "csv" file.
420
421        Parameters
422        ----------
423        inputdir : str
424            The path where all data from the retrievals are stored.
425
426        request_number : int
427            Number of mars requests for flux and non-flux data.
428
429        Return
430        ------
431
432        '''
433
434        # Get all class attributes and their values as a dictionary
435        attrs = vars(self).copy()
436        del attrs['server']
437        del attrs['public']
438
439        # open a file to store all requests to
440        with open(os.path.join(inputdir,
441                               _config.FILE_MARS_REQUESTS), 'a') as f:
442            f.write(str(request_number) + ', ')
443            f.write(', '.join(str(attrs[key])
444                              for key in sorted(attrs.keys())))
445            f.write('\n')
446
447        return
448   
449    def _convert_to_cdsera5_sfc_request(self, attrs):
450        '''
451        The keywords and values for the single level download
452        with CDS API is different from MARS. This function
453        converts the old request keywords to the new ones.
454       
455        Example request for single level downloads in CDS API
456       
457        retrieve(
458            'reanalysis-era5-single-levels',
459            {
460                'product_type': 'reanalysis',
461                'variable': 'total_precipitation',
462                'year': '2019',
463                'month': '01',
464                'day': '01',
465                'time': '00:00',
466                'format': 'grib',
467                'grid':[1.0, 1.0],
468                'area': [
469                    45, 0, 43,
470                    12,
471                ],
472            },
473            'download.grib')
474           
475        Parameters
476        ----------
477        attrs : dict
478            Dictionary of the mars request parameters.
479
480        Return
481        ------
482
483        '''
484        from datetime import datetime, timedelta
485        newattrs = {}
486
487        if '/' in attrs['date']:
488            year = set()
489            month = set()
490            day = set()
491            start,end = attrs['date'].split('/')[::2]
492            sdate = datetime.strptime(start, '%Y%m%d')
493            edate = datetime.strptime(end, '%Y%m%d')
494            date = sdate
495            while date <= edate:
496                year.add(date.year)
497                month.add(date.month)
498                day.add(date.day)     
499                date = date + timedelta(days=1)
500            newattrs['year'] =list(year)
501            newattrs['month'] = list(month)
502            newattrs['day'] =  list(day)                       
503        else:
504            date = datetime.strptime(attrs['date'], '%Y%m%d')
505            newattrs['year'] = date.year
506            newattrs['month'] = date.month
507            newattrs['day'] =  date.day         
508 
509        # need to correct the time appearance for CDS surface field retrievals
510        if attrs['type'] == 'FC': # for EA5 only flux fields are retrieved as FC type
511            # need to convert fc start times 06/18 to usual AN times
512            # since the surface fields can only be accessed through their validity time
513            start, end, step = map(int,attrs['step'].split('/')[::2])
514            newattrs['time'] = [ "{0:0=2d}".format(s) for s in range(0,24,step) ]
515        elif '/' in attrs['time']: # we expect a list of times separated by /
516            newattrs['time'] = attrs['time'].split('/')
517        elif isinstance(attrs['time'], str): # we expect a single time here
518            newattrs['time'] = [ attrs['time'] ]
519       
520        newattrs['product_type'] = 'reanalysis'
521        newattrs['area'] = attrs['area'].split('/')
522        newattrs['grid'] = list(map(float,attrs['grid'].split('/')))
523        newattrs['param'] = attrs['param'].split('/')       
524        if '/' in attrs['step']:
525            sstep = int(attrs['step'].split('/')[4])
526            newattrs['time'] = list(map(str,range(0,24,sstep)))             
527        elif '160.128' in attrs['param']: 
528            newattrs['time'] = attrs['time']
529        newattrs['format'] = 'grib'
530               
531        return newattrs
532
533    def data_retrieve(self):
534        '''Submits a MARS retrieval. Depending on the existence of
535        ECMWF Web-API or CDS API it is submitted via Python or a
536        subprocess in the Shell. The parameter for the mars retrieval
537        are taken from the defined class attributes.
538
539        Parameters
540        ----------
541
542        Return
543        ------
544
545        '''
546        # Get all class attributes and their values as a dictionary
547        attrs = vars(self).copy()
548
549        # eliminate unnecessary attributes from the dictionary attrs
550        del attrs['server']
551        del attrs['public']
552
553        # exchange parameter name for marsclass
554        mclass = attrs.get('marsclass')
555        del attrs['marsclass']
556        attrs['class'] = mclass
557
558        # prepare target variable as needed for the Web API or CDS API mode
559        # within the dictionary for full access
560        # as a single variable for public access
561        target = attrs.get('target')
562        if not int(self.public):
563            del attrs['target']
564        print('target: ' + target)
565       
566        # find all keys without a value and convert all other values to strings
567        empty_keys = []
568        for key, value in attrs.items():
569            if value == '':
570                empty_keys.append(str(key))
571            else:
572                attrs[key] = str(value)
573
574        # delete all empty parameter from the dictionary
575        for key in empty_keys:
576            del attrs[key]
577
578#        attrs['ppengine'] = 'emos'
579
580        # MARS request via Python script
581        if self.server:
582            try:
583                if cds_api and isinstance(self.server, cdsapi.Client):
584                    # distinguish between model (ECMWF MARS access)
585                    # and surface level (CS3 online access)
586                    if attrs['levtype'].lower() == 'ml':
587                        dataset = _config.CDS_DATASET_ML
588                    else:
589                        dataset = _config.CDS_DATASET_SFC
590                        attrs = self._convert_to_cdsera5_sfc_request(attrs)
591                    print('RETRIEVE ERA5 WITH CDS API!')
592                    self.server.retrieve(dataset,
593                                         attrs, target)
594                elif ec_api and isinstance(self.server, ecmwfapi.ECMWFDataServer):
595                    print('RETRIEVE PUBLIC DATA (NOT ERA5)!')
596                    self.server.retrieve(attrs)
597                elif ec_api and isinstance(self.server, ecmwfapi.ECMWFService):
598                    print('EXECUTE NON-PUBLIC RETRIEVAL (NOT ERA5)!')
599                    self.server.execute(attrs, target)
600                else:
601                    print('ERROR:')
602                    print('No match for Web API instance!')
603                    raise IOError
604            except Exception as e:
605                print('\n\nMARS Request failed!')
606                print(e)
607                print(traceback.format_exc())
608                sys.exit()
609
610        # MARS request via call in shell
611        else:
612            request_str = 'ret'
613            for key, value in attrs.items():
614                request_str = request_str + ',' + key + '=' + str(value)
615            request_str += ',target="' + target + '"'
616            p = subprocess.Popen(['mars'], #'-e'],
617                                 stdin=subprocess.PIPE,
618                                 stdout=subprocess.PIPE,
619                                 stderr=subprocess.PIPE,
620                                 bufsize=1)
621            pout = p.communicate(input=request_str.encode())[0]
622            print(pout.decode())
623
624            if 'Some errors reported' in pout.decode():
625                print('MARS Request failed - please check request')
626                raise IOError
627            elif os.stat(target).st_size == 0:
628                print('MARS Request returned no data - please check request')
629                raise IOError
630
631        return
Note: See TracBrowser for help on using the repository browser.
hosted by ZAMG