From 6fbc98328da9870eab2c0b3232e8dd0bebf20a8d Mon Sep 17 00:00:00 2001 From: "zoiszogop@gmail.com" Date: Wed, 9 Oct 2024 15:04:19 +0200 Subject: [PATCH] stilt - partly lint stiltobj.py and work on get_ts() --- icoscp/docs/changelog.md | 4 + icoscp_stilt/mkdocs.yml | 1 + icoscp_stilt/src/icoscp_stilt/stiltobj.py | 340 +++++++++++----------- 3 files changed, 172 insertions(+), 173 deletions(-) diff --git a/icoscp/docs/changelog.md b/icoscp/docs/changelog.md index c66bec2..8af24f8 100644 --- a/icoscp/docs/changelog.md +++ b/icoscp/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## 0.2.1 +- #### dependencies + - Version pin library's dependencies. + ## 0.2.0 - #### cpauth module - Remove legacy authentication. diff --git a/icoscp_stilt/mkdocs.yml b/icoscp_stilt/mkdocs.yml index 9ebd704..3f4f357 100644 --- a/icoscp_stilt/mkdocs.yml +++ b/icoscp_stilt/mkdocs.yml @@ -5,6 +5,7 @@ nav: - About: index.md - Getting started with the new icoscp_stilt: getting_started.md - Legacy modules: modules.md + - Changelog: changelog.md theme: name: readthedocs diff --git a/icoscp_stilt/src/icoscp_stilt/stiltobj.py b/icoscp_stilt/src/icoscp_stilt/stiltobj.py index 8b92653..f874585 100644 --- a/icoscp_stilt/src/icoscp_stilt/stiltobj.py +++ b/icoscp_stilt/src/icoscp_stilt/stiltobj.py @@ -4,21 +4,27 @@ Description: Class that creates objects to set and get the attributes of a station for which STILT model output is available for. """ +# Standard library imports. +from pathlib import Path +from typing import Any, List, Optional +import json import os + +# Related third party imports. +from icoscp_core.icos import meta +from icoscp_core.queries.dataobjlist import SamplingHeightFilter import numpy as np import pandas as pd import requests -import json import xarray as xr -from . import const as CPC -from typing import Any -from . import timefuncs as tf -from icoscp_core.queries.dataobjlist import SamplingHeightFilter -from icoscp_core.icos import meta + +# Local application/library specific imports. from . import __version__ as release_version +from . import const as c +from . import timefuncs as tf -class StiltStation(): +class StiltStation(): """ Attributes: id: STILT station ID (e.g. 'HTM150') locIdent: String with latitude-longitude-altitude @@ -39,12 +45,12 @@ class StiltStation(): def __init__(self, st_dict: dict[str, Any]): # Object attributes: - self._path_fp = CPC.STILTFP # Path to location where STILT footprints are stored - self._path_ts = CPC.STILTPATH # Path to location where STILT time series are stored - self._url = CPC.STILTTS # URL to STILT information - self.info = st_dict # store the initial dict - self.valid = False # if input is dictionary, this will be True - self.id = None # STILT ID for station (e.g. 'HTM150') + self._path_fp = c.STILTFP # Path to location where STILT footprints are stored + self._path_ts = c.STILTPATH # Path to location where STILT time series are stored + self._url = c.STILTTS # URL to STILT information + self.info = st_dict # store the initial dict + self.valid = False # if input is dictionary, this will be True + self.id = None # STILT ID for station (e.g. 'HTM150') self.lat = None self.lon = None self.alt = None @@ -53,16 +59,18 @@ def __init__(self, st_dict: dict[str, Any]): self.icos: dict[str, Any] | None = None self.years = None self.geoinfo = None - self.dobjs_list = None # Store a list of associated dobjs - self.dobjs_valid = False # If True, dobjs sparql query already executed + self.dobjs_list = None # Store a list of associated dobjs + self.dobjs_valid = False # If True, dobjs sparql query already executed self._set(st_dict) - #------------------------------------------------------------------------ + # ------------------------------------------------------------------------ def _set(self, st_dict: dict[str, Any]): # - if all(item in st_dict.keys() for item in ['id', 'lat', 'lon', 'alt', 'locIdent', 'name', 'icos', 'years', 'geoinfo']): + if all(item in st_dict.keys() for item in + ['id', 'lat', 'lon', 'alt', 'locIdent', 'name', 'icos', 'years', + 'geoinfo']): self.id = st_dict['id'] self.lat = st_dict['lat'] self.lon = st_dict['lon'] @@ -74,9 +82,8 @@ def _set(self, st_dict: dict[str, Any]): self.geoinfo = st_dict['geoinfo'] self.valid = True - def __str__(self): - # by default called when a an 'object' is printed + # By default, called when an 'object' is printed out = {'id:': self.id, 'name:': self.name, @@ -86,8 +93,8 @@ def __str__(self): } return json.dumps(out) - #---------------------------------------------------------------------------------------------------------- - def get_ts(self, start_date, end_date, hours=None, columns=None): + def get_ts(self, start_date, end_date, hours=None, + columns: Optional[str] = None): """ STILT concentration time series for a given time period, with optional selection of specific hours and columns. @@ -100,10 +107,19 @@ def get_ts(self, start_date, end_date, hours=None, columns=None): end_date : STR, FLOAT/INT (Unix timestamp), datetime object Example: end_date = '2018-01-31' hours : STR | INT, optional - If hours is empty or None, ALL Timeslots are returned. - [0, 3, 6, 9, 12 ,15, 18, 21] + If hours argument is empty or None, ALL Timeslots are + returned: [0, 3, 6, 9, 12 ,15, 18, 21] + columns : Optional[str] + Valid entries are: 'default', 'co2', 'ch4', 'co', 'rn', + 'wind', 'latlon', 'all'. + Using 'default', empty, or None will return: + ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel', + 'co2.cement', 'co2.background'] + A full description of the 'columns' can be found at + https://icos-carbon-portal.github.io/pylib/icoscp_stilt/modules/#get_tsstart_date-end_date-hoursnone-columns - Valid results are returned as result with LOWER BOUND values. + + Valid results are returned as a result with LOWER-BOUND values. For backwards compatibility, input for str format hh:mm is accepted. @@ -114,15 +130,6 @@ def get_ts(self, start_date, end_date, hours=None, columns=None): hours = [] return ALL hours = ['10', '10:00', 10] returns timeslot 9 - columns : TYPE, optional - Valid entries are: - 'default', 'co2', 'ch4', 'co', 'rn', 'wind', 'latlon', 'all' - Using 'default', empty, or None will return: - ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel', - 'co2.cement', 'co2.background'] - A full description of the 'columns' can be found at - https://icos-carbon-portal.github.io/pylib/modules/#stilt - Returns ------- Pandas Dataframe @@ -138,69 +145,75 @@ def get_ts(self, start_date, end_date, hours=None, columns=None): if not hours: return False # Create an empty dataframe to store the timeseries: - df=pd.DataFrame({'A' : []}) - # Add headers: - headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'} + df = pd.DataFrame({'A': []}) # Create an empty list, to store the new time range with # available STILT model results: - new_range=[] + new_range = [] # Create a pandas dataframe containing one column of datetime # objects with 3-hour intervals: - # date_range = pd.date_range(start_date, end_date+dt.timedelta(hours=24), freq='3H') date_range = pd.date_range(s_date, e_date, freq='3H') - # Loop through every Datetime object in the dataframe: + # Loop through every Datetime object in the dataframe and + # generate a list of Datetime objects for the STILT results + # that exist. for zDate in date_range: - # Check if STILT results exist: - if os.path.exists(self._path_fp + self.locIdent + '/' + - str(zDate.year)+'/'+str(zDate.month).zfill(2)+'/'+ - str(zDate.year)+'x'+str(zDate.month).zfill(2)+'x'+str(zDate.day).zfill(2)+'x'+ - str(zDate.hour).zfill(2)+'/'): - - # If STILT-results exist for the current Datetime - # object, append current Datetime object to list: + # Path may look like this: + # /data/stiltweb/slots/51.41Nx006.88Ex00200/2021/01/2021x01x01x00 + if Path( + self._path_fp + self.locIdent, + str(zDate.year), + str(zDate.month).zfill(2), + str(zDate.year) + 'x' + str(zDate.month).zfill(2) + 'x' + + str(zDate.day).zfill(2) + 'x' + str(zDate.hour).zfill(2) + ).exists(): new_range.append(zDate) - # If the list is not empty: if len(new_range) > 0: - # Assign the new time range to date_range: date_range = new_range - # Get new starting date: - fromDate = date_range[0].strftime('%Y-%m-%d') - # Get new ending date: - toDate = date_range[-1].strftime('%Y-%m-%d') - # Store the STILT result column names to a variable: - columns = self.__columns(columns) - # Store the STILT result data column names to a variable: - data = '{"columns": '+columns+', "fromDate": "'+fromDate+'", "toDate": "'+toDate+'", "stationId": "'+self.id+'"}' - # Send request to get STILT results: - response = requests.post(self._url, headers=headers, data=data) - # Check if response is successful: - if response.status_code != 500: - # Get response in json-format and read it in to a numpy array: - output=np.asarray(response.json()) + from_date = date_range[0].strftime('%Y-%m-%d') + to_date = date_range[-1].strftime('%Y-%m-%d') + columns = self.__columns(columns) + http_resp = requests.post( + url=self._url, + json={ + 'stationId': self.id, + 'fromDate': from_date, + 'toDate': to_date, + 'columns': columns + }, + timeout=c.HTTP_TIMEOUT_SEC + ) + if http_resp.status_code == 200: + output = np.asarray(http_resp.json()) # Convert numpy array with STILT results to a pandas dataframe - cols = columns[1:-1].replace('"','').replace(' ','') - cols = list(cols.split(',')) - df = pd.DataFrame(output[:,:], columns=cols) - # Replace 'null'-values with numpy NaN-values: - df = df.replace('null',np.NaN) - # Set dataframe data type to float: + df = pd.DataFrame(output[:, :], columns=columns) + df = df.replace('null', np.NaN) df = df.astype(float) - # Convert the data type of the 'date'-column to Datetime Object: + # Convert 'date' column to a Datetime Object type. df['date'] = pd.to_datetime(df['isodate'], unit='s') # Set 'date'-column as index: - df.set_index(['date'],inplace=True) + df.set_index(['date'], inplace=True) # Filter dataframe values by timeslots: hours = [str(h).zfill(2) for h in hours] df = df.loc[df.index.strftime('%H').isin(hours)] else: - # Print message: - print("\033[0;31;1m Error...\nToo big STILT dataset!\nSelect data for a shorter time period.\n\n") - - # track data usage + msg = (f'\033[0;31;1m' + f'There was an error during the http request. To ' + f'reproduce it, run:\n' + f'import requests\n\n' + f'http_resp = requests.post(\n' + f"\turl='{self._url}',\n" + f'\tjson={{\n' + f"\t\t'stationId': '{self.id}',\n" + f"\t\t'fromDate': '{from_date}',\n" + f"\t\t'toDate': '{to_date}',\n" + f"\t\t'columns': {columns}\n" + f'\t}},\n' + f'\ttimeout={c.HTTP_TIMEOUT_SEC}\n' + f')\n' + f'print(http_resp.content)') + print(msg) + # Track data usage self.__portalUse('timeseries') - # Return dataframe: return df - #---------------------------------------------------------------------------------------------------------- def get_fp(self, start_date, end_date, hours=None): """ @@ -290,8 +303,8 @@ def get_fp(self, start_date, end_date, hours=None): def get_raw(self, start_date, end_date, cols): """ - Please do use this function with caution. Only very expirienced user - should load raw data. + Please do use this function with caution. Only very experienced + user should load raw data. Parameters ---------- @@ -301,7 +314,7 @@ def get_raw(self, start_date, end_date, cols): End date in form yy-mm-dd. cols : LIST[STR] A list of valid column names. You can retrieve the full list - with _raw_clumn_names. + with _raw_column_names. Returns ------- @@ -329,32 +342,33 @@ def get_raw(self, start_date, end_date, cols): # create http header and payload: - headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'} - data = '{"columns": '+ str(columns) + ',"fromDate": "'+s_date+'", "toDate": "'+e_date+'", "stationId": "'+self.id+'"}' - response = requests.post(CPC.STILTRAW, headers=headers, data=data) + headers = {'Content-Type': 'application/json', + 'Accept-Charset': 'UTF-8'} + data = '{"columns": ' + str( + columns) + ',"fromDate": "' + s_date + '", "toDate": "' + e_date + '", "stationId": "' + self.id + '"}' + response = requests.post(c.STILTRAW, headers=headers, data=data) if response.status_code != 500: - # Get response in json-format and read it in to a numpy array: - output=np.asarray(response.json()) + output = np.asarray(response.json()) # Convert numpy array with STILT results to a pandas dataframe - cols = columns[1:-1].replace('"','').replace(' ','') + cols = columns[1:-1].replace('"', '').replace(' ', '') cols = list(cols.split(',')) - df = pd.DataFrame(output[:,:], columns=cols) + df = pd.DataFrame(output[:, :], columns=cols) # Replace 'null'-values with numpy NaN-values: - df = df.replace('null',np.NaN) + df = df.replace('null', np.NaN) # Set dataframe data type to float: df = df.astype(float) # Convert the data type of the 'date'-column to Datetime Object: - df['date'] = pd.to_datetime(df['isodate'], unit='s') - - # Set 'date'-column as index: - df.set_index(['date'],inplace=True) + if 'isodate' in df.columns: + df['isodate'] = pd.to_datetime(df['isodate'], unit='s') + # Set 'date'-column as index: + df.set_index(['date'], inplace=True) # track data usage self.__portalUse('timeseries') @@ -390,96 +404,76 @@ def get_dobj_list(self): 'timeStart': dobj.time_start, 'timeEnd': dobj.time_end } for dobj in meta.list_data_objects( - datatype=[CPC.OBS_SPEC_CO2, CPC.OBS_SPEC_CH4], - station=CPC.ICOS_STATION_PREFIX + self.icos['stationId'], + datatype=[c.CP_OBSPACK_CO2_SPEC, c.CP_OBSPACK_CH4_SPEC], + station=c.ICOS_STATION_PREFIX + self.icos['stationId'], filters=[SamplingHeightFilter("=", float(self.icos['SamplingHeight']))] )] self.dobjs_valid = True return self.dobjs_list - - def __columns(self, cols): + def __columns(self, columns: Optional[str] = None) -> Optional[List]: # Function that checks the selection of columns that are to be # returned with the STILT timeseries model output: - if cols: - # Convert user-specified columns to lower case. - cols = cols.lower() - - # check for a valid entry. If not...return default - valid = ["default", "co2", "ch4", "co", "rn", "wind", "latlon", "all"] - if cols not in valid: - cols = 'default' + columns = columns.lower() if columns else None + valid = ['default', 'co2', 'ch4', 'co', 'rn', 'wind', 'latlon', 'all'] + columns = 'default' if columns not in valid else columns # Check columns-input: - if cols=='default': - columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+ - '"co2.cement","co2.non_fuel",'+ - '"co2.background"]') - - elif cols=='co2': - columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+ - '"co2.cement","co2.non_fuel",'+ - '"co2.bio.gee","co2.bio.resp",' + - '"co2.fuel.coal","co2.fuel.oil","co2.fuel.gas",'+ - '"co2.fuel.bio","co2.fuel.waste",'+ - '"co2.energy","co2.transport","co2.industry",'+ - '"co2.residential","co2.other_categories",'+ - '"co2.background"]') - - elif cols=='co': - columns = ('["isodate", "co.stilt","co.fuel",'+ - '"co.cement","co.non_fuel",'+ - '"co.fuel.coal","co.fuel.oil","co.fuel.gas",'+ - '"co.fuel.bio","co.fuel.waste",'+ - '"co.energy","co.transport","co.industry",'+ - '"co.residential","co.other_categories",'+ - '"co.background"]') - - elif cols=='ch4': - columns = ('["isodate", "ch4.stilt",'+ - '"ch4.anthropogenic","ch4.natural",'+ - '"ch4.agriculture","ch4.waste",'+ - '"ch4.energy","ch4.other_categories",'+ - '"ch4.wetlands","ch4.soil_uptake",'+ - '"ch4.wildfire","ch4.other_natural",'+ - '"ch4.background"]') - - elif cols=='rn': - columns = ('["isodate", "rn", "rn.era", "rn.noah"]') - - elif cols=='wind': - columns = ('["isodate", "wind.dir", "wind.u", "wind.v"]') - - elif cols=='latlon': - columns = ('["isodate", "latstart", "lonstart"]') - - elif cols=='all': - columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+ - '"co2.cement","co2.non_fuel",'+ - '"co2.bio.gee", "co2.bio.resp",' + - '"co2.fuel.coal","co2.fuel.oil","co2.fuel.gas",'+ - '"co2.fuel.bio","co2.fuel.waste",'+ - '"co2.energy","co2.transport", "co2.industry",'+ - '"co2.residential","co2.other_categories",'+ - '"co2.background",'+ - '"co.stilt","co.fuel","co.cement","co.non_fuel",'+ - '"isodate", "ch4.stilt",'+ - '"ch4.anthropogenic","ch4.natural",'+ - '"ch4.agriculture","ch4.waste",'+ - '"ch4.energy","ch4.other_categories",'+ - '"ch4.wetlands","ch4.soil_uptake",'+ - '"ch4.wildfire","ch4.other_natural",'+ - '"ch4.background",'+ - '"co.fuel.coal","co.fuel.oil","co.fuel.gas",'+ - '"co.fuel.bio","co.fuel.waste",'+ - '"co.energy","co.transport","co.industry",'+ - '"co.residential","co.other_categories",'+ - '"co.background",'+ - '"rn", "rn.era","rn.noah",'+ - '"wind.dir","wind.u","wind.v",'+ - '"latstart","lonstart"]') - - # Return variable: + if columns == 'default': + columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel', + 'co2.cement', 'co2.non_fuel', 'co2.background'] + + elif columns == 'co2': + columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel', + 'co2.cement', 'co2.non_fuel', 'co2.bio.gee', + 'co2.bio.resp', 'co2.fuel.coal', 'co2.fuel.oil', + 'co2.fuel.gas', 'co2.fuel.bio', 'co2.fuel.waste', + 'co2.energy', 'co2.transport', 'co2.industry', + 'co2.residential', 'co2.other_categories', + 'co2.background'] + + elif columns == 'co': + columns = ['isodate', 'co.stilt', 'co.fuel', 'co.cement', + 'co.non_fuel', 'co.fuel.coal', 'co.fuel.oil', + 'co.fuel.gas', 'co.fuel.bio', 'co.fuel.waste', + 'co.energy', 'co.transport', 'co.industry', + 'co.residential', 'co.other_categories', + 'co.background'] + + elif columns == 'ch4': + columns = ['isodate', 'ch4.stilt', 'ch4.anthropogenic', + 'ch4.natural', 'ch4.agriculture', 'ch4.waste', + 'ch4.energy', 'ch4.other_categories', 'ch4.wetlands', + 'ch4.soil_uptake', 'ch4.wildfire', 'ch4.other_natural', + 'ch4.background'] + + elif columns == 'rn': + columns = ['isodate', 'rn', 'rn.era', 'rn.noah'] + + elif columns == 'wind': + columns = ['isodate', 'wind.dir', 'wind.u', 'wind.v'] + + elif columns == 'latlon': + columns = ['isodate', 'latstart', 'lonstart'] + + elif columns == 'all': + columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel', + 'co2.cement', 'co2.non_fuel', 'co2.bio.gee', + 'co2.bio.resp', 'co2.fuel.coal', 'co2.fuel.oil', + 'co2.fuel.gas', 'co2.fuel.bio', 'co2.fuel.waste', + 'co2.energy', 'co2.transport', 'co2.industry', + 'co2.residential', 'co2.other_categories', + 'co2.background', 'co.stilt', 'co.fuel', 'co.cement', + 'co.non_fuel', 'isodate', 'ch4.stilt', + 'ch4.anthropogenic', 'ch4.natural', 'ch4.agriculture', + 'ch4.waste', 'ch4.energy', 'ch4.other_categories', + 'ch4.wetlands', 'ch4.soil_uptake', 'ch4.wildfire', + 'ch4.other_natural', 'ch4.background', 'co.fuel.coal', + 'co.fuel.oil', 'co.fuel.gas', 'co.fuel.bio', + 'co.fuel.waste', 'co.energy', 'co.transport', + 'co.industry', 'co.residential', 'co.other_categories', + 'co.background', 'rn', 'rn.era', 'rn.noah', 'wind.dir', + 'wind.u', 'wind.v', 'latstart', 'lonstart'] return columns def __portalUse(self, dtype):