From c6f49c6c01fb8f9f46d48ef2f9dcb7e342ce262b Mon Sep 17 00:00:00 2001 From: Jacob Mims <122570226+jtmims@users.noreply.github.com> Date: Tue, 23 Apr 2024 11:06:56 -0400 Subject: [PATCH 1/4] add start_time and end_time attrs to catalog_builder.py --- tools/catalog_builder/catalog_builder.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/catalog_builder/catalog_builder.py b/tools/catalog_builder/catalog_builder.py index a83ebe1ab..de5c8c6a1 100644 --- a/tools/catalog_builder/catalog_builder.py +++ b/tools/catalog_builder/catalog_builder.py @@ -73,6 +73,8 @@ def parse_gfdl_pp_ts(file_name: str): cell_methods = "" cell_measures = "" time_range = split[1] + start_time = time_range.split('-')[0] + end_time = time_range.split('-')[1] variable_id = split[2] source_type = "" member_id = "" @@ -141,6 +143,8 @@ def parse_gfdl_pp_ts(file_name: str): 'grid_label': grid_label, 'units': units, 'time_range': time_range, + 'start_time': start_time, + 'end_time': end_time, 'chunk_freq': chunk_freq, 'standard_name': standard_name, 'long_name': long_name, From ddd7b472dc85364afa5a45b82d2db0ef8845100d Mon Sep 17 00:00:00 2001 From: Jacob Mims <122570226+jtmims@users.noreply.github.com> Date: Fri, 26 Apr 2024 12:04:11 -0400 Subject: [PATCH 2/4] add int to date conversion in check_group_daterange in preprocessor.py --- src/preprocessor.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/preprocessor.py b/src/preprocessor.py index 01b2664ac..6ff72873e 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -797,6 +797,16 @@ def check_group_daterange(self, group_df: pd.DataFrame, log=_log) -> pd.DataFram """ date_col = "date_range" try: + # convert int to date type + date_format = '' + date_digits = math.floor(math.log10(group_df['start_time'].values[0]))+1 + match date_digits: + case 8: + date_format = '%Y%m%d' + case 14: + date_format = '%Y%m%d%H%M%S' + group_df['start_time'] = pd.to_datetime(group_df['start_time'].values[0], format=date_format) + group_df['end_time'] = pd.to_datetime(group_df['end_time'].values[0], format=date_format) # method throws ValueError if ranges aren't contiguous dates_df = group_df.loc[:, ['start_time', 'end_time']] date_range_vals = [] From 58aafd8d8cd04de9b4c701c97d9ba9c0627e8cb5 Mon Sep 17 00:00:00 2001 From: Jacob Mims <122570226+jtmims@users.noreply.github.com> Date: Fri, 26 Apr 2024 15:18:18 -0400 Subject: [PATCH 3/4] add conditional to check for date type --- src/preprocessor.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/preprocessor.py b/src/preprocessor.py index 6ff72873e..48284aa60 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -797,16 +797,17 @@ def check_group_daterange(self, group_df: pd.DataFrame, log=_log) -> pd.DataFram """ date_col = "date_range" try: - # convert int to date type - date_format = '' - date_digits = math.floor(math.log10(group_df['start_time'].values[0]))+1 - match date_digits: - case 8: - date_format = '%Y%m%d' - case 14: - date_format = '%Y%m%d%H%M%S' - group_df['start_time'] = pd.to_datetime(group_df['start_time'].values[0], format=date_format) - group_df['end_time'] = pd.to_datetime(group_df['end_time'].values[0], format=date_format) + if not isinstance(group_df['start_time'], datetime.date): + # convert int to date type + date_format = '' + date_digits = math.floor(math.log10(group_df['start_time'].values[0]))+1 + match date_digits: + case 8: + date_format = '%Y%m%d' + case 14: + date_format = '%Y%m%d%H%M%S' + group_df['start_time'] = pd.to_datetime(group_df['start_time'].values[0], format=date_format) + group_df['end_time'] = pd.to_datetime(group_df['end_time'].values[0], format=date_format) # method throws ValueError if ranges aren't contiguous dates_df = group_df.loc[:, ['start_time', 'end_time']] date_range_vals = [] From 92d09b5f8ca8f87aae95bbba40932e08217d2cfb Mon Sep 17 00:00:00 2001 From: Jacob Mims <122570226+jtmims@users.noreply.github.com> Date: Fri, 26 Apr 2024 15:19:53 -0400 Subject: [PATCH 4/4] point to pd value --- src/preprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preprocessor.py b/src/preprocessor.py index 48284aa60..960f8e25f 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -797,7 +797,7 @@ def check_group_daterange(self, group_df: pd.DataFrame, log=_log) -> pd.DataFram """ date_col = "date_range" try: - if not isinstance(group_df['start_time'], datetime.date): + if not isinstance(group_df['start_time'].values[0], datetime.date): # convert int to date type date_format = '' date_digits = math.floor(math.log10(group_df['start_time'].values[0]))+1