From 6fbc98328da9870eab2c0b3232e8dd0bebf20a8d Mon Sep 17 00:00:00 2001
From: "zoiszogop@gmail.com" <zoiszogop@gmail.com>
Date: Wed, 9 Oct 2024 15:04:19 +0200
Subject: [PATCH] stilt - partly lint stiltobj.py and work on get_ts()

---
 icoscp/docs/changelog.md                  |   4 +
 icoscp_stilt/mkdocs.yml                   |   1 +
 icoscp_stilt/src/icoscp_stilt/stiltobj.py | 340 +++++++++++-----------
 3 files changed, 172 insertions(+), 173 deletions(-)

diff --git a/icoscp/docs/changelog.md b/icoscp/docs/changelog.md
index c66bec2..8af24f8 100644
--- a/icoscp/docs/changelog.md
+++ b/icoscp/docs/changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## 0.2.1
+- #### dependencies
+    - Version pin library's dependencies.
+
 ## 0.2.0
 - #### cpauth module
     - Remove legacy authentication.
diff --git a/icoscp_stilt/mkdocs.yml b/icoscp_stilt/mkdocs.yml
index 9ebd704..3f4f357 100644
--- a/icoscp_stilt/mkdocs.yml
+++ b/icoscp_stilt/mkdocs.yml
@@ -5,6 +5,7 @@ nav:
     - About: index.md
     - Getting started with the new icoscp_stilt: getting_started.md
     - Legacy modules: modules.md
+    - Changelog: changelog.md
 
 theme:
     name: readthedocs
diff --git a/icoscp_stilt/src/icoscp_stilt/stiltobj.py b/icoscp_stilt/src/icoscp_stilt/stiltobj.py
index 8b92653..f874585 100644
--- a/icoscp_stilt/src/icoscp_stilt/stiltobj.py
+++ b/icoscp_stilt/src/icoscp_stilt/stiltobj.py
@@ -4,21 +4,27 @@
     Description:      Class that creates objects to set and get the attributes
                       of a station for which STILT model output is available for.
 """
+# Standard library imports.
+from pathlib import Path
+from typing import Any, List, Optional
+import json
 import os
+
+# Related third party imports.
+from icoscp_core.icos import meta
+from icoscp_core.queries.dataobjlist import SamplingHeightFilter
 import numpy as np
 import pandas as pd
 import requests
-import json
 import xarray as xr
-from . import const as CPC
-from typing import Any
-from . import timefuncs as tf
-from icoscp_core.queries.dataobjlist import SamplingHeightFilter
-from icoscp_core.icos import meta
+
+# Local application/library specific imports.
 from . import __version__ as release_version
+from . import const as c
+from . import timefuncs as tf
 
-class StiltStation():
 
+class StiltStation():
     """
     Attributes: id:              STILT station ID (e.g. 'HTM150')
                 locIdent:        String with latitude-longitude-altitude
@@ -39,12 +45,12 @@ class StiltStation():
     def __init__(self, st_dict: dict[str, Any]):
 
         # Object attributes:
-        self._path_fp = CPC.STILTFP     # Path to location where STILT footprints are stored
-        self._path_ts = CPC.STILTPATH   # Path to location where STILT time series are stored
-        self._url = CPC.STILTTS         # URL to STILT information
-        self.info = st_dict             # store the initial dict
-        self.valid = False              # if input is dictionary, this will be True
-        self.id = None                  # STILT ID for station (e.g. 'HTM150')
+        self._path_fp = c.STILTFP  # Path to location where STILT footprints are stored
+        self._path_ts = c.STILTPATH  # Path to location where STILT time series are stored
+        self._url = c.STILTTS  # URL to STILT information
+        self.info = st_dict  # store the initial dict
+        self.valid = False  # if input is dictionary, this will be True
+        self.id = None  # STILT ID for station (e.g. 'HTM150')
         self.lat = None
         self.lon = None
         self.alt = None
@@ -53,16 +59,18 @@ def __init__(self, st_dict: dict[str, Any]):
         self.icos: dict[str, Any] | None = None
         self.years = None
         self.geoinfo = None
-        self.dobjs_list = None               # Store a list of associated dobjs
-        self.dobjs_valid = False        # If True, dobjs sparql query already executed        
+        self.dobjs_list = None  # Store a list of associated dobjs
+        self.dobjs_valid = False  # If True, dobjs sparql query already executed
 
         self._set(st_dict)
 
-    #------------------------------------------------------------------------
+    # ------------------------------------------------------------------------
 
     def _set(self, st_dict: dict[str, Any]):
         #
-        if all(item in st_dict.keys() for item in ['id', 'lat', 'lon', 'alt', 'locIdent', 'name', 'icos', 'years', 'geoinfo']):
+        if all(item in st_dict.keys() for item in
+               ['id', 'lat', 'lon', 'alt', 'locIdent', 'name', 'icos', 'years',
+                'geoinfo']):
             self.id = st_dict['id']
             self.lat = st_dict['lat']
             self.lon = st_dict['lon']
@@ -74,9 +82,8 @@ def _set(self, st_dict: dict[str, Any]):
             self.geoinfo = st_dict['geoinfo']
             self.valid = True
 
-
     def __str__(self):
-        # by default called when a an 'object' is printed
+        # By default, called when an 'object' is printed
 
         out = {'id:': self.id,
                'name:': self.name,
@@ -86,8 +93,8 @@ def __str__(self):
                }
         return json.dumps(out)
 
-    #----------------------------------------------------------------------------------------------------------
-    def get_ts(self, start_date, end_date, hours=None, columns=None):
+    def get_ts(self, start_date, end_date, hours=None,
+               columns: Optional[str] = None):
         """
         STILT concentration time series for a given time period,
         with optional selection of specific hours and columns.
@@ -100,10 +107,19 @@ def get_ts(self, start_date, end_date, hours=None, columns=None):
         end_date : STR, FLOAT/INT (Unix timestamp), datetime object
             Example: end_date = '2018-01-31'
         hours : STR | INT, optional
-            If hours is empty or None, ALL Timeslots are returned.
-            [0, 3, 6, 9, 12 ,15, 18, 21]
+            If hours argument is empty or None, ALL Timeslots are
+            returned: [0, 3, 6, 9, 12 ,15, 18, 21]
+        columns : Optional[str]
+            Valid entries are: 'default', 'co2', 'ch4', 'co', 'rn',
+            'wind', 'latlon', 'all'.
+            Using 'default', empty, or None will return:
+            ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel',
+            'co2.cement', 'co2.background']
+            A full description of the 'columns' can be found at
+            https://icos-carbon-portal.github.io/pylib/icoscp_stilt/modules/#get_tsstart_date-end_date-hoursnone-columns
 
-        Valid results are returned as result with LOWER BOUND values.
+
+        Valid results are returned as a result with LOWER-BOUND values.
         For backwards compatibility, input for str format hh:mm is
         accepted.
 
@@ -114,15 +130,6 @@ def get_ts(self, start_date, end_date, hours=None, columns=None):
         hours = [] return ALL
         hours = ['10', '10:00', 10] returns timeslot 9
 
-        columns : TYPE, optional
-            Valid entries are:
-            'default', 'co2', 'ch4', 'co', 'rn', 'wind', 'latlon', 'all'
-            Using 'default', empty, or None will return:
-            ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel',
-            'co2.cement', 'co2.background']
-            A full description of the 'columns' can be found at
-            https://icos-carbon-portal.github.io/pylib/modules/#stilt
-
         Returns
         -------
         Pandas Dataframe
@@ -138,69 +145,75 @@ def get_ts(self, start_date, end_date, hours=None, columns=None):
         if not hours:
             return False
         # Create an empty dataframe to store the timeseries:
-        df=pd.DataFrame({'A' : []})
-        # Add headers:
-        headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'}
+        df = pd.DataFrame({'A': []})
         # Create an empty list, to store the new time range with
         # available STILT model results:
-        new_range=[]
+        new_range = []
         # Create a pandas dataframe containing one column of datetime
         # objects with 3-hour intervals:
-        # date_range = pd.date_range(start_date, end_date+dt.timedelta(hours=24), freq='3H')
         date_range = pd.date_range(s_date, e_date, freq='3H')
-        # Loop through every Datetime object in the dataframe:
+        # Loop through every Datetime object in the dataframe and
+        # generate a list of Datetime objects for the STILT results
+        # that exist.
         for zDate in date_range:
-            # Check if STILT results exist:
-            if os.path.exists(self._path_fp + self.locIdent + '/' +
-                              str(zDate.year)+'/'+str(zDate.month).zfill(2)+'/'+
-                              str(zDate.year)+'x'+str(zDate.month).zfill(2)+'x'+str(zDate.day).zfill(2)+'x'+
-                              str(zDate.hour).zfill(2)+'/'):
-
-                # If STILT-results exist for the current Datetime
-                # object, append current Datetime object to list:
+            # Path may look like this:
+            # /data/stiltweb/slots/51.41Nx006.88Ex00200/2021/01/2021x01x01x00
+            if Path(
+                    self._path_fp + self.locIdent,
+                    str(zDate.year),
+                    str(zDate.month).zfill(2),
+                    str(zDate.year) + 'x' + str(zDate.month).zfill(2) + 'x' +
+                    str(zDate.day).zfill(2) + 'x' + str(zDate.hour).zfill(2)
+            ).exists():
                 new_range.append(zDate)
-        # If the list is not empty:
         if len(new_range) > 0:
-            # Assign the new time range to date_range:
             date_range = new_range
-            # Get new starting date:
-            fromDate = date_range[0].strftime('%Y-%m-%d')
-            # Get new ending date:
-            toDate = date_range[-1].strftime('%Y-%m-%d')
-            # Store the STILT result column names to a variable:
-            columns  = self.__columns(columns)
-            # Store the STILT result data column names to a variable:
-            data = '{"columns": '+columns+', "fromDate": "'+fromDate+'", "toDate": "'+toDate+'", "stationId": "'+self.id+'"}'
-            # Send request to get STILT results:
-            response = requests.post(self._url, headers=headers, data=data)
-            # Check if response is successful:
-            if response.status_code != 500:
-                # Get response in json-format and read it in to a numpy array:
-                output=np.asarray(response.json())
+            from_date = date_range[0].strftime('%Y-%m-%d')
+            to_date = date_range[-1].strftime('%Y-%m-%d')
+            columns = self.__columns(columns)
+            http_resp = requests.post(
+                url=self._url,
+                json={
+                    'stationId': self.id,
+                    'fromDate': from_date,
+                    'toDate': to_date,
+                    'columns': columns
+                },
+                timeout=c.HTTP_TIMEOUT_SEC
+            )
+            if http_resp.status_code == 200:
+                output = np.asarray(http_resp.json())
                 # Convert numpy array with STILT results to a pandas dataframe
-                cols = columns[1:-1].replace('"','').replace(' ','')
-                cols = list(cols.split(','))
-                df = pd.DataFrame(output[:,:], columns=cols)
-                # Replace 'null'-values with numpy NaN-values:
-                df = df.replace('null',np.NaN)
-                # Set dataframe data type to float:
+                df = pd.DataFrame(output[:, :], columns=columns)
+                df = df.replace('null', np.NaN)
                 df = df.astype(float)
-                # Convert the data type of the 'date'-column to Datetime Object:
+                # Convert 'date' column to a Datetime Object type.
                 df['date'] = pd.to_datetime(df['isodate'], unit='s')
                 # Set 'date'-column as index:
-                df.set_index(['date'],inplace=True)
+                df.set_index(['date'], inplace=True)
                 # Filter dataframe values by timeslots:
                 hours = [str(h).zfill(2) for h in hours]
                 df = df.loc[df.index.strftime('%H').isin(hours)]
             else:
-                # Print message:
-                print("\033[0;31;1m Error...\nToo big STILT dataset!\nSelect data for a shorter time period.\n\n")
-
-        # track data usage
+                msg = (f'\033[0;31;1m'
+                       f'There was an error during the http request. To '
+                       f'reproduce it, run:\n'
+                       f'import requests\n\n'
+                       f'http_resp = requests.post(\n'
+                       f"\turl='{self._url}',\n"
+                       f'\tjson={{\n'
+                       f"\t\t'stationId': '{self.id}',\n"
+                       f"\t\t'fromDate': '{from_date}',\n"
+                       f"\t\t'toDate': '{to_date}',\n"
+                       f"\t\t'columns': {columns}\n"
+                       f'\t}},\n'
+                       f'\ttimeout={c.HTTP_TIMEOUT_SEC}\n'
+                       f')\n'
+                       f'print(http_resp.content)')
+                print(msg)
+        # Track data usage
         self.__portalUse('timeseries')
-        # Return dataframe:
         return df
-    #----------------------------------------------------------------------------------------------------------
 
     def get_fp(self, start_date, end_date, hours=None):
         """
@@ -290,8 +303,8 @@ def get_fp(self, start_date, end_date, hours=None):
 
     def get_raw(self, start_date, end_date, cols):
         """
-        Please do use this function with caution. Only very expirienced user
-        should load raw data.
+        Please do use this function with caution. Only very experienced
+        user should load raw data.
 
         Parameters
         ----------
@@ -301,7 +314,7 @@ def get_raw(self, start_date, end_date, cols):
             End date in form yy-mm-dd.
         cols : LIST[STR]
             A list of valid column names. You can retrieve the full list
-            with _raw_clumn_names.
+            with _raw_column_names.
 
         Returns
         -------
@@ -329,32 +342,33 @@ def get_raw(self, start_date, end_date, cols):
 
         # create http header and payload:
 
-        headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'}
-        data = '{"columns": '+ str(columns) + ',"fromDate": "'+s_date+'", "toDate": "'+e_date+'", "stationId": "'+self.id+'"}'
-        response = requests.post(CPC.STILTRAW, headers=headers, data=data)
+        headers = {'Content-Type': 'application/json',
+                   'Accept-Charset': 'UTF-8'}
+        data = '{"columns": ' + str(
+            columns) + ',"fromDate": "' + s_date + '", "toDate": "' + e_date + '", "stationId": "' + self.id + '"}'
+        response = requests.post(c.STILTRAW, headers=headers, data=data)
 
         if response.status_code != 500:
-
             # Get response in json-format and read it in to a numpy array:
-            output=np.asarray(response.json())
+            output = np.asarray(response.json())
 
             # Convert numpy array with STILT results to a pandas dataframe
-            cols = columns[1:-1].replace('"','').replace(' ','')
+            cols = columns[1:-1].replace('"', '').replace(' ', '')
             cols = list(cols.split(','))
-            df = pd.DataFrame(output[:,:], columns=cols)
+            df = pd.DataFrame(output[:, :], columns=cols)
 
             # Replace 'null'-values with numpy NaN-values:
-            df = df.replace('null',np.NaN)
+            df = df.replace('null', np.NaN)
 
             # Set dataframe data type to float:
             df = df.astype(float)
 
             # Convert the data type of the 'date'-column to Datetime Object:
-            df['date'] = pd.to_datetime(df['isodate'], unit='s')
-
-            # Set 'date'-column as index:
-            df.set_index(['date'],inplace=True)
+            if 'isodate' in df.columns:
+                df['isodate'] = pd.to_datetime(df['isodate'], unit='s')
 
+                # Set 'date'-column as index:
+                df.set_index(['date'], inplace=True)
 
         # track data usage
         self.__portalUse('timeseries')
@@ -390,96 +404,76 @@ def get_dobj_list(self):
                 'timeStart': dobj.time_start,
                 'timeEnd': dobj.time_end
             } for dobj in meta.list_data_objects(
-                datatype=[CPC.OBS_SPEC_CO2, CPC.OBS_SPEC_CH4],
-                station=CPC.ICOS_STATION_PREFIX + self.icos['stationId'],
+                datatype=[c.CP_OBSPACK_CO2_SPEC, c.CP_OBSPACK_CH4_SPEC],
+                station=c.ICOS_STATION_PREFIX + self.icos['stationId'],
                 filters=[SamplingHeightFilter("=", float(self.icos['SamplingHeight']))]
             )]
             self.dobjs_valid = True
         return self.dobjs_list
 
-
-    def __columns(self, cols):
+    def __columns(self, columns: Optional[str] = None) -> Optional[List]:
         # Function that checks the selection of columns that are to be
         # returned with the STILT timeseries model output:
-        if cols:
-            # Convert user-specified columns to lower case.
-            cols = cols.lower()
-
-        # check for a valid entry. If not...return default
-        valid = ["default", "co2", "ch4", "co", "rn", "wind", "latlon", "all"]
-        if cols not in valid:
-            cols = 'default'
+        columns = columns.lower() if columns else None
+        valid = ['default', 'co2', 'ch4', 'co', 'rn', 'wind', 'latlon', 'all']
+        columns = 'default' if columns not in valid else columns
 
         # Check columns-input:
-        if cols=='default':
-            columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+
-                       '"co2.cement","co2.non_fuel",'+
-                       '"co2.background"]')
-
-        elif cols=='co2':
-            columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+
-                       '"co2.cement","co2.non_fuel",'+
-                       '"co2.bio.gee","co2.bio.resp",' +
-                       '"co2.fuel.coal","co2.fuel.oil","co2.fuel.gas",'+
-                       '"co2.fuel.bio","co2.fuel.waste",'+
-                       '"co2.energy","co2.transport","co2.industry",'+
-                       '"co2.residential","co2.other_categories",'+
-                       '"co2.background"]')
-
-        elif cols=='co':
-            columns = ('["isodate", "co.stilt","co.fuel",'+
-                       '"co.cement","co.non_fuel",'+
-                       '"co.fuel.coal","co.fuel.oil","co.fuel.gas",'+
-                       '"co.fuel.bio","co.fuel.waste",'+
-                       '"co.energy","co.transport","co.industry",'+
-                       '"co.residential","co.other_categories",'+
-                       '"co.background"]')
-
-        elif cols=='ch4':
-            columns = ('["isodate", "ch4.stilt",'+
-                       '"ch4.anthropogenic","ch4.natural",'+
-                       '"ch4.agriculture","ch4.waste",'+
-                       '"ch4.energy","ch4.other_categories",'+
-                       '"ch4.wetlands","ch4.soil_uptake",'+
-                       '"ch4.wildfire","ch4.other_natural",'+                       
-                       '"ch4.background"]')
-
-        elif cols=='rn':
-            columns = ('["isodate", "rn", "rn.era", "rn.noah"]')
-
-        elif cols=='wind':
-            columns = ('["isodate", "wind.dir", "wind.u", "wind.v"]')
-
-        elif cols=='latlon':
-            columns = ('["isodate", "latstart", "lonstart"]')
-
-        elif cols=='all':
-            columns = ('["isodate","co2.stilt","co2.bio","co2.fuel",'+
-                       '"co2.cement","co2.non_fuel",'+
-                       '"co2.bio.gee", "co2.bio.resp",' +
-                       '"co2.fuel.coal","co2.fuel.oil","co2.fuel.gas",'+
-                       '"co2.fuel.bio","co2.fuel.waste",'+
-                       '"co2.energy","co2.transport", "co2.industry",'+
-                       '"co2.residential","co2.other_categories",'+
-                       '"co2.background",'+
-                       '"co.stilt","co.fuel","co.cement","co.non_fuel",'+
-                       '"isodate", "ch4.stilt",'+
-                       '"ch4.anthropogenic","ch4.natural",'+
-                       '"ch4.agriculture","ch4.waste",'+
-                       '"ch4.energy","ch4.other_categories",'+
-                       '"ch4.wetlands","ch4.soil_uptake",'+
-                       '"ch4.wildfire","ch4.other_natural",'+                       
-                       '"ch4.background",'+
-                       '"co.fuel.coal","co.fuel.oil","co.fuel.gas",'+
-                       '"co.fuel.bio","co.fuel.waste",'+
-                       '"co.energy","co.transport","co.industry",'+
-                       '"co.residential","co.other_categories",'+
-                       '"co.background",'+
-                       '"rn", "rn.era","rn.noah",'+
-                       '"wind.dir","wind.u","wind.v",'+
-                       '"latstart","lonstart"]')
-
-        # Return variable:
+        if columns == 'default':
+            columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel',
+                       'co2.cement', 'co2.non_fuel', 'co2.background']
+
+        elif columns == 'co2':
+            columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel',
+                       'co2.cement', 'co2.non_fuel', 'co2.bio.gee',
+                       'co2.bio.resp', 'co2.fuel.coal', 'co2.fuel.oil',
+                       'co2.fuel.gas', 'co2.fuel.bio', 'co2.fuel.waste',
+                       'co2.energy', 'co2.transport', 'co2.industry',
+                       'co2.residential', 'co2.other_categories',
+                       'co2.background']
+
+        elif columns == 'co':
+            columns = ['isodate', 'co.stilt', 'co.fuel', 'co.cement',
+                       'co.non_fuel', 'co.fuel.coal', 'co.fuel.oil',
+                       'co.fuel.gas', 'co.fuel.bio', 'co.fuel.waste',
+                       'co.energy', 'co.transport', 'co.industry',
+                       'co.residential', 'co.other_categories',
+                       'co.background']
+
+        elif columns == 'ch4':
+            columns = ['isodate', 'ch4.stilt', 'ch4.anthropogenic',
+                       'ch4.natural', 'ch4.agriculture', 'ch4.waste',
+                       'ch4.energy', 'ch4.other_categories', 'ch4.wetlands',
+                       'ch4.soil_uptake', 'ch4.wildfire', 'ch4.other_natural',
+                       'ch4.background']
+
+        elif columns == 'rn':
+            columns = ['isodate', 'rn', 'rn.era', 'rn.noah']
+
+        elif columns == 'wind':
+            columns = ['isodate', 'wind.dir', 'wind.u', 'wind.v']
+
+        elif columns == 'latlon':
+            columns = ['isodate', 'latstart', 'lonstart']
+
+        elif columns == 'all':
+            columns = ['isodate', 'co2.stilt', 'co2.bio', 'co2.fuel',
+                       'co2.cement', 'co2.non_fuel', 'co2.bio.gee',
+                       'co2.bio.resp', 'co2.fuel.coal', 'co2.fuel.oil',
+                       'co2.fuel.gas', 'co2.fuel.bio', 'co2.fuel.waste',
+                       'co2.energy', 'co2.transport', 'co2.industry',
+                       'co2.residential', 'co2.other_categories',
+                       'co2.background', 'co.stilt', 'co.fuel', 'co.cement',
+                       'co.non_fuel', 'isodate', 'ch4.stilt',
+                       'ch4.anthropogenic', 'ch4.natural', 'ch4.agriculture',
+                       'ch4.waste', 'ch4.energy', 'ch4.other_categories',
+                       'ch4.wetlands', 'ch4.soil_uptake', 'ch4.wildfire',
+                       'ch4.other_natural', 'ch4.background', 'co.fuel.coal',
+                       'co.fuel.oil', 'co.fuel.gas', 'co.fuel.bio',
+                       'co.fuel.waste', 'co.energy', 'co.transport',
+                       'co.industry', 'co.residential', 'co.other_categories',
+                       'co.background', 'rn', 'rn.era', 'rn.noah', 'wind.dir',
+                       'wind.u', 'wind.v', 'latstart', 'lonstart']
         return columns
 
     def __portalUse(self, dtype):