From 054908790191e37cebf901b4a2bbd0d1189cf6aa Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Thu, 16 Feb 2023 18:01:09 +0000 Subject: [PATCH 01/11] Remove FIM3 and most references to GMS --- ...nittests.lst => deny_branch_unittests.lst} | 2 +- ...s_branch_zero.lst => deny_branch_zero.lst} | 2 +- ...eny_gms_branches.lst => deny_branches.lst} | 2 +- config/{deny_gms_unit.lst => deny_unit.lst} | 2 +- config/params_template.env | 2 +- data/acquire_and_preprocess_inputs.py | 397 ------------------ data/nws/preprocess_ahps_nws.py | 18 + data/usgs/acquire_and_preprocess_3dep_dems.py | 2 +- fim_post_processing.sh | 16 +- fim_pre_processing.sh | 20 +- gms_pipeline.sh | 208 --------- gms_run_branch.sh | 244 ----------- gms_run_post_processing.sh | 184 -------- gms_run_unit.sh | 208 --------- src/add_crosswalk.py | 162 ++++--- src/bash_variables.env | 1 - src/{gms => }/buffer_stream_branches.py | 0 src/{gms => }/clip_rasters_to_branches.py | 0 src/{gms => }/crosswalk_nwm_demDerived.py | 4 +- .../delineate_hydros_and_produce_HAND.sh | 6 +- src/{gms => }/derive_level_paths.py | 6 +- src/{gms => }/edit_points.py | 0 ...puts_by_huc.py => filter_inputs_by_huc.py} | 18 +- src/{gms => }/finalize_srcs.py | 0 src/generate_branch_list.py | 2 +- src/gms/__init__.py | 0 src/gms/aggregate_branch_lists.py | 57 --- src/gms/generate_branch_list.py | 36 -- src/gms/remove_error_branches.py | 96 ----- src/gms/run_by_unit.sh | 272 ------------ src/gms/test_new_crosswalk.sh | 4 - src/gms/time_and_tee_run_by_branch.sh | 29 -- src/gms/time_and_tee_run_by_unit.sh | 29 -- src/{gms => }/make_rem.py | 0 src/{gms => }/mask_dem.py | 0 src/output_cleanup.py | 117 ------ src/{gms => }/outputs_cleanup.py | 0 src/process_branch.sh | 6 +- .../query_vectors_by_branch_polygons.py | 0 src/{gms => }/reset_mannings.py | 2 +- src/{gms => }/run_by_branch.sh | 6 +- src/run_unit_wb.sh | 14 +- src/{gms => }/stream_branches.py | 0 .../subset_catch_list_by_branch_id.py | 0 src/{gms => }/toDo.md | 0 src/usgs_gage_aggregate.py | 12 +- src/usgs_gage_unit_setup.py | 27 +- src/utils/fim_enums.py | 4 +- .../combine_crosswalk_tables.py | 0 .../compare_ms_and_non_ms_metrics.py | 0 tools/{gms_tools => }/compile_comp_stats.py | 17 + .../compile_computational_stats.py | 18 +- tools/composite_inundation.py | 31 +- tools/consolidate_metrics.py | 17 + tools/copy_test_case_folders.py | 18 +- tools/eval_plots.py | 12 +- tools/{gms_tools => }/evaluate_continuity.py | 0 .../find_max_catchment_breadth.py | 0 tools/generate_categorical_fim_mapping.py | 4 +- tools/gms_tools/__init__.py | 0 tools/{gms_tools => }/inundate_gms.py | 16 +- tools/inundate_nation.py | 63 ++- tools/inundation.py | 6 +- tools/make_boxes_from_bounds.py | 4 +- tools/{gms_tools => }/mosaic_inundation.py | 10 +- .../{gms_tools => }/overlapping_inundation.py | 0 tools/{gms_tools => }/plots.py | 0 tools/run_test_case.py | 110 ++--- tools/synthesize_test_cases.py | 110 +---- unit_tests/README.md | 18 +- unit_tests/__template_unittests.py | 2 +- unit_tests/check_unit_errors_params.json | 4 +- unit_tests/check_unit_errors_unittests.py | 4 +- .../{gms => }/derive_level_paths_params.json | 0 .../{gms => }/derive_level_paths_unittests.py | 4 +- ...catchments_and_add_attributes_unittests.py | 4 +- .../{gms => }/outputs_cleanup_params.json | 4 +- .../{gms => }/outputs_cleanup_unittests.py | 2 +- unit_tests/split_flows_unittests.py | 4 +- .../{gms_tools => }/inundate_gms_params.json | 0 .../{gms_tools => }/inundate_gms_unittests.py | 2 +- 81 files changed, 400 insertions(+), 2301 deletions(-) rename config/{deny_gms_branch_unittests.lst => deny_branch_unittests.lst} (97%) rename config/{deny_gms_branch_zero.lst => deny_branch_zero.lst} (97%) rename config/{deny_gms_branches.lst => deny_branches.lst} (97%) rename config/{deny_gms_unit.lst => deny_unit.lst} (92%) delete mode 100755 data/acquire_and_preprocess_inputs.py delete mode 100755 gms_pipeline.sh delete mode 100755 gms_run_branch.sh delete mode 100755 gms_run_post_processing.sh delete mode 100755 gms_run_unit.sh rename src/{gms => }/buffer_stream_branches.py (100%) rename src/{gms => }/clip_rasters_to_branches.py (100%) rename src/{gms => }/crosswalk_nwm_demDerived.py (99%) rename src/{gms => }/delineate_hydros_and_produce_HAND.sh (94%) rename src/{gms => }/derive_level_paths.py (97%) rename src/{gms => }/edit_points.py (100%) rename src/{gms/filter_gms_inputs_by_huc.py => filter_inputs_by_huc.py} (52%) rename src/{gms => }/finalize_srcs.py (100%) delete mode 100644 src/gms/__init__.py delete mode 100755 src/gms/aggregate_branch_lists.py delete mode 100755 src/gms/generate_branch_list.py delete mode 100755 src/gms/remove_error_branches.py delete mode 100755 src/gms/run_by_unit.sh delete mode 100755 src/gms/test_new_crosswalk.sh delete mode 100755 src/gms/time_and_tee_run_by_branch.sh delete mode 100755 src/gms/time_and_tee_run_by_unit.sh rename src/{gms => }/make_rem.py (100%) rename src/{gms => }/mask_dem.py (100%) delete mode 100755 src/output_cleanup.py rename src/{gms => }/outputs_cleanup.py (100%) rename src/{gms => }/query_vectors_by_branch_polygons.py (100%) rename src/{gms => }/reset_mannings.py (99%) rename src/{gms => }/run_by_branch.sh (89%) rename src/{gms => }/stream_branches.py (100%) rename src/{gms => }/subset_catch_list_by_branch_id.py (100%) rename src/{gms => }/toDo.md (100%) rename tools/{gms_tools => }/combine_crosswalk_tables.py (100%) rename tools/{gms_tools => }/compare_ms_and_non_ms_metrics.py (100%) rename tools/{gms_tools => }/compile_comp_stats.py (81%) rename tools/{gms_tools => }/compile_computational_stats.py (64%) rename tools/{gms_tools => }/evaluate_continuity.py (100%) rename tools/{gms_tools => }/find_max_catchment_breadth.py (100%) delete mode 100644 tools/gms_tools/__init__.py rename tools/{gms_tools => }/inundate_gms.py (94%) rename tools/{gms_tools => }/mosaic_inundation.py (94%) rename tools/{gms_tools => }/overlapping_inundation.py (100%) rename tools/{gms_tools => }/plots.py (100%) rename unit_tests/{gms => }/derive_level_paths_params.json (100%) rename unit_tests/{gms => }/derive_level_paths_unittests.py (98%) rename unit_tests/{gms => }/outputs_cleanup_params.json (80%) rename unit_tests/{gms => }/outputs_cleanup_unittests.py (99%) rename unit_tests/tools/{gms_tools => }/inundate_gms_params.json (100%) rename unit_tests/tools/{gms_tools => }/inundate_gms_unittests.py (98%) diff --git a/config/deny_gms_branch_unittests.lst b/config/deny_branch_unittests.lst similarity index 97% rename from config/deny_gms_branch_unittests.lst rename to config/deny_branch_unittests.lst index cb947ee1c..5e9a0968e 100644 --- a/config/deny_gms_branch_unittests.lst +++ b/config/deny_branch_unittests.lst @@ -1,4 +1,4 @@ -# List of files for gms branches to delete +# List of files for branches to delete # Use comment to allow list the file # Use curly braces to denote branch_id agree_bufgrid.tif diff --git a/config/deny_gms_branch_zero.lst b/config/deny_branch_zero.lst similarity index 97% rename from config/deny_gms_branch_zero.lst rename to config/deny_branch_zero.lst index 3bda34079..01bfaee85 100644 --- a/config/deny_gms_branch_zero.lst +++ b/config/deny_branch_zero.lst @@ -1,4 +1,4 @@ -# List of files for gms branches to delete +# List of files for branch zero to delete # Use comment to allow list the file # Use curly braces to denote branch_id agree_bufgrid.tif diff --git a/config/deny_gms_branches.lst b/config/deny_branches.lst similarity index 97% rename from config/deny_gms_branches.lst rename to config/deny_branches.lst index b1edb0261..9629d0317 100644 --- a/config/deny_gms_branches.lst +++ b/config/deny_branches.lst @@ -1,4 +1,4 @@ -# List of files for gms branches to delete +# List of files for branches to delete # Use comment to allow list the file # Use curly braces to denote branch_id agree_bufgrid.tif diff --git a/config/deny_gms_unit.lst b/config/deny_unit.lst similarity index 92% rename from config/deny_gms_unit.lst rename to config/deny_unit.lst index 0ad10cc82..b4680544e 100644 --- a/config/deny_gms_unit.lst +++ b/config/deny_unit.lst @@ -1,4 +1,4 @@ -# List of files for gms branches to delete +# List of files for units to delete # Use comment to allow list the file #branch_id.lst #branch_polygons.gpkg diff --git a/config/params_template.env b/config/params_template.env index 1895e6a25..cfd4248a3 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -20,7 +20,7 @@ export slope_min=0.001 export min_catchment_area=0.25 export min_stream_length=0.5 -#### gms parameters #### +#### branch parameters #### export branch_id_attribute=levpa_id export branch_buffer_distance_meters=7000 export branch_timeout=4000 # pass int or float. To make a percentage of median, pass a '%' at the end. diff --git a/data/acquire_and_preprocess_inputs.py b/data/acquire_and_preprocess_inputs.py deleted file mode 100755 index 37cb33e3d..000000000 --- a/data/acquire_and_preprocess_inputs.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python3 - -import os -import argparse -import csv -import sys -sys.path.append('/foss_fim/src') -import shutil -from multiprocessing import Pool -import geopandas as gpd -from urllib.error import HTTPError -from tqdm import tqdm - -from utils.shared_variables import (NHD_URL_PARENT, - NHD_URL_PREFIX, - NHD_RASTER_URL_SUFFIX, - NHD_VECTOR_URL_SUFFIX, - NHD_VECTOR_EXTRACTION_PREFIX, - NHD_VECTOR_EXTRACTION_SUFFIX, - PREP_PROJECTION, - WBD_NATIONAL_URL, - FIM_ID, - OVERWRITE_WBD, - OVERWRITE_NHD, - OVERWRITE_ALL, - nhd_raster_url_template, - nhd_vector_url_template - ) - -from utils.shared_functions import (pull_file, run_system_command, - subset_wbd_gpkg, delete_file, - getDriver) - -NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors' -NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters' -NWM_HYDROFABRIC_DIRNAME = 'nwm_hydrofabric' -NWM_FILE_TO_SUBSET_WITH = 'nwm_flows.gpkg' - -def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use): - - intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] - - return(wbd[intersecting_indices]) - -def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use,overwrite_wbd,num_workers): - """ - This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL. - This function also subsets the WBD layers (HU4, HU6, HU8) to CONUS and converts to geopkacage layers. - - Args: - path_to_saved_data_parent_dir (str): The system path to where the WBD will be downloaded, unzipped, and preprocessed. - - """ - - # Construct path to wbd_directory and create if not existent. - wbd_directory = os.path.join(path_to_saved_data_parent_dir, 'wbd') - if not os.path.exists(wbd_directory): - os.mkdir(wbd_directory) - - wbd_gdb_path = os.path.join(wbd_directory, 'WBD_National_GDB.gdb') - pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') - - multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - - nwm_huc_list_file_template = os.path.join(wbd_directory,'nwm_wbd{}.csv') - - nwm_file_to_use = os.path.join(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_use) - if not os.path.isfile(nwm_file_to_use): - raise IOError("NWM File to Subset Too Not Available: {}".format(nwm_file_to_use)) - - if not os.path.exists(multilayer_wbd_geopackage) or overwrite_wbd: - # Download WBD and unzip if it's not already done. - if not os.path.exists(wbd_gdb_path): - if not os.path.exists(pulled_wbd_zipped_path): - pull_file(WBD_NATIONAL_URL, pulled_wbd_zipped_path) - os.system("7za x {pulled_wbd_zipped_path} -o{wbd_directory}".format(pulled_wbd_zipped_path=pulled_wbd_zipped_path, wbd_directory=wbd_directory)) - - procs_list, wbd_gpkg_list = [], [] - multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - # Add fimid to HU8, project, and convert to geopackage. - if os.path.isfile(multilayer_wbd_geopackage): - os.remove(multilayer_wbd_geopackage) - print("Making National WBD GPKG...") - print("\tWBDHU8") - wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8') - wbd_hu8 = wbd_hu8.rename(columns={'huc8':'HUC8'}) # rename column to caps - wbd_hu8 = wbd_hu8.sort_values('HUC8') - fimids = [str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8)))] - wbd_hu8[FIM_ID] = fimids - wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. - wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use) - wbd_hu8.geometry = wbd_hu8.buffer(0) - wbd_hu8.to_file(multilayer_wbd_geopackage,layer='WBDHU8',driver=getDriver(multilayer_wbd_geopackage),index=False) # Save. - wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'),index=False,header=False) - #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg')) # Append to wbd_gpkg_list for subsetting later. - del wbd_hu8 - - # Prepare procs_list for multiprocessed geopackaging. - for wbd_layer_num in ['4', '6']: - wbd_layer = 'WBDHU' + wbd_layer_num - print("\t{}".format(wbd_layer)) - wbd = gpd.read_file(wbd_gdb_path,layer=wbd_layer) - wbd = wbd.to_crs(PREP_PROJECTION) - wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num}) - wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use) - wbd.geometry = wbd.buffer(0) - wbd.to_file(multilayer_wbd_geopackage,layer=wbd_layer,driver=getDriver(multilayer_wbd_geopackage),index=False) - wbd['HUC{}'.format(wbd_layer_num)].to_csv(nwm_huc_list_file_template.format(wbd_layer_num),index=False,header=False) - #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg') - #wbd_gpkg_list.append(output_gpkg) - #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)]) - - # with Pool(processes=num_workers) as pool: - # pool.map(run_system_command, procs_list) - - # Subset WBD layers to CONUS and add to single geopackage. - #print("Subsetting WBD layers to CONUS...") - #multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - #for gpkg in wbd_gpkg_list: - # subset_wbd_gpkg(gpkg, multilayer_wbd_geopackage) - - # Clean up temporary files. - #for temp_layer in ['WBDHU4', 'WBDHU6', 'WBDHU8']: - # delete_file(os.path.join(wbd_directory, temp_layer + '.gpkg')) - #pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip') - #delete_file(pulled_wbd_zipped_path) - #delete_file(os.path.join(wbd_directory, 'WBD_National_GDB.jpg')) - - return(wbd_directory) - -def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers): - """ - This helper function pulls and unzips NWM hydrofabric data. It uses the NWM hydrofabric URL defined by NWM_HYDROFABRIC_URL. - - Args: - path_to_saved_data_parent_dir (str): The system path to where a 'nwm' subdirectory will be created and where NWM hydrofabric - will be downloaded, unzipped, and preprocessed. - - """ - - # -- Acquire and preprocess NWM data -- # - nwm_hydrofabric_directory = os.path.join(path_to_saved_data_parent_dir, 'nwm_hydrofabric') - if not os.path.exists(nwm_hydrofabric_directory): - os.mkdir(nwm_hydrofabric_directory) - - nwm_hydrofabric_gdb = os.path.join(path_to_preinputs_dir, 'nwm_v21.gdb') - - # Project and convert to geopackage. - print("Projecting and converting NWM layers to geopackage...") - procs_list = [] - for nwm_layer in ['nwm_flows', 'nwm_lakes', 'nwm_catchments']: # I had to project the catchments and waterbodies because these 3 layers had varying CRSs. - print("Operating on " + nwm_layer) - output_gpkg = os.path.join(nwm_hydrofabric_directory, nwm_layer + '_proj.gpkg') - procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nwm_hydrofabric_gdb} {nwm_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nwm_hydrofabric_gdb=nwm_hydrofabric_gdb, nwm_layer=nwm_layer)]) - - with Pool(processes=num_workers) as pool: - pool.map(run_system_command, procs_list) - - -def pull_and_prepare_nhd_data(nhd_raster_download_url, - nhd_raster_extraction_path, - nhd_vector_download_url, - nhd_vector_extraction_path, - overwrite_nhd_dem, - overwrite_nhd_gdb - ): - """ - This helper function is designed to be multiprocessed. It pulls and unzips NHD raster and vector data. - Args: - args (list): A list of arguments in this format: [nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path] - """ - # Update extraction path from .zip to .gdb - nhd_gdb = nhd_vector_extraction_path.replace('.zip', '.gdb') - - # Download raster and vector, if not already in user's directory (exist check performed by pull_file()). - nhd_raster_extraction_parent = os.path.dirname(nhd_raster_extraction_path) - huc = os.path.basename(nhd_raster_extraction_path).split('_')[2] - - nhd_raster_parent_dir = os.path.join(nhd_raster_extraction_parent, 'HRNHDPlusRasters' + huc) - - if not os.path.exists(nhd_raster_parent_dir): - os.mkdir(nhd_raster_parent_dir) - - elev_cm_tif = os.path.join(nhd_raster_parent_dir, 'elev_cm.tif') - elev_m_tif = os.path.join(nhd_raster_parent_dir, 'elev_m.tif') - if not os.path.exists(elev_cm_tif) or overwrite_nhd_dem: - pull_file(nhd_raster_download_url, nhd_raster_extraction_path) - os.system("7za e {nhd_raster_extraction_path} -o{nhd_raster_parent_dir} elev_cm.tif -r ".format(nhd_raster_extraction_path=nhd_raster_extraction_path, nhd_raster_parent_dir=nhd_raster_parent_dir)) - - file_list = os.listdir(nhd_raster_parent_dir) - for f in file_list: - full_path = os.path.join(nhd_raster_parent_dir, f) - if 'elev_cm' not in f: - if os.path.isdir(full_path): - shutil.rmtree(full_path) - elif os.path.isfile(full_path): - os.remove(full_path) - os.remove(nhd_raster_extraction_path) - - nhd_vector_extraction_parent = os.path.dirname(nhd_vector_extraction_path) - - if not os.path.exists(nhd_vector_extraction_parent): - os.mkdir(nhd_vector_extraction_parent) - - if not os.path.exists(nhd_gdb) or overwrite_nhd_gdb: # Only pull if not already pulled and processed. - # Download and fully unzip downloaded GDB. - pull_file(nhd_vector_download_url, nhd_vector_extraction_path) - huc = os.path.split(nhd_vector_extraction_parent)[1] # Parse HUC. - os.system("7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}".format(nhd_vector_extraction_path=nhd_vector_extraction_path, nhd_vector_extraction_parent=nhd_vector_extraction_parent)) - # extract input stream network - nhd = gpd.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') - nhd = nhd.to_crs(PREP_PROJECTION) - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusBurnLineEvent' + huc + '.gpkg'),driver='GPKG') - # extract flowlines for FType attributes - nhd = gpd.read_file(nhd_gdb,layer='NHDFlowline') - nhd = nhd.to_crs(PREP_PROJECTION) - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDFlowline' + huc + '.gpkg'),driver='GPKG') - # extract attributes - nhd = gpd.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') - nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusFlowLineVAA' + huc + '.gpkg'),driver='GPKG') - # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- # - #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']: - # run_system_command(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nhd_gdb} {nhd_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nhd_gdb=nhd_gdb, nhd_layer=nhd_layer)]) # Use list because function is configured for multiprocessing. - # Delete unnecessary files. - delete_file(nhd_vector_extraction_path.replace('.zip', '.jpg')) - delete_file(nhd_vector_extraction_path) # Delete the zipped GDB. - - -def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): - """ - This function builds a list of available HUC4s, HUC6s, and HUC8s and saves the lists to .lst files. - - Args: - path_to_saved_data_parent_dir (str): The path to the parent directory where the .lst files will be saved. - wbd_directory (str): The path to the directory storing the WBD geopackages which are used to determine which HUCs are available for processing. - - """ - - print("Building included HUC lists...") - # Identify all saved NHDPlus Vectors. - nhd_plus_raster_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_RASTERS_DIRNAME) - nhd_plus_vector_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_VECTORS_DIRNAME) - - huc4_list = [i[-4:] for i in os.listdir(nhd_plus_raster_dir)] - huc6_list, huc8_list = [], [] - - # Read WBD into dataframe. - full_huc_gpkg = os.path.join(wbd_directory, 'WBD_National.gpkg') - huc_gpkg = 'WBDHU8' # The WBDHU4 are handled by the nhd_plus_raster_dir name. - - # Open geopackage. - wbd = gpd.read_file(full_huc_gpkg, layer=huc_gpkg) - - # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in tqdm(wbd.iterrows(),total=len(wbd)): - huc = row["HUC" + huc_gpkg[-1]] - huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry - burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg') - if os.path.exists(burnline): - nhd_test = len(gpd.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s - # Append huc to huc8 list. - if (str(huc[:4]) in huc4_list) & (nhd_test>0): - huc8_list.append(huc) - - huc6_list = [w[:6] for w in huc8_list] - huc6_list = set(huc6_list) - - # Write huc lists to appropriate .lst files. - huc_lists_dir = os.path.join(path_to_saved_data_parent_dir, 'huc_lists') - if not os.path.exists(huc_lists_dir): - os.mkdir(huc_lists_dir) - included_huc4_file = os.path.join(huc_lists_dir, 'included_huc4.lst') - included_huc6_file = os.path.join(huc_lists_dir, 'included_huc6.lst') - included_huc8_file = os.path.join(huc_lists_dir, 'included_huc8.lst') - - # Overly verbose file writing loops. Doing this in a pinch. - with open(included_huc4_file, 'w') as f: - for item in huc4_list: - f.write("%s\n" % item) - - with open(included_huc6_file, 'w') as f: - for item in huc6_list: - f.write("%s\n" % item) - - with open(included_huc8_file, 'w') as f: - for item in huc8_list: - f.write("%s\n" % item) - - -def manage_preprocessing( hucs_of_interest, - num_workers=1, - overwrite_nhd_dem=False, - overwrite_nhd_gdb=False, - overwrite_wbd=False - ): - """ - This functions manages the downloading and preprocessing of gridded and vector data for FIM production. - - Args: - hucs_of_interest (str): Path to a user-supplied config file of hydrologic unit codes to be pulled and post-processed. - - """ - - #get input data dir - path_to_saved_data_parent_dir = os.environ['inputDataDir'] - - nhd_procs_list = [] # Initialize procs_list for multiprocessing. - - # Create the parent directory if nonexistent. - if not os.path.exists(path_to_saved_data_parent_dir): - os.mkdir(path_to_saved_data_parent_dir) - - # Create NHDPlus raster parent directory if nonexistent. - nhd_raster_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_RASTERS_DIRNAME) - if not os.path.exists(nhd_raster_dir): - os.mkdir(nhd_raster_dir) - - # Create the vector data parent directory if nonexistent. - vector_data_dir = os.path.join(path_to_saved_data_parent_dir, NHDPLUS_VECTORS_DIRNAME) - if not os.path.exists(vector_data_dir): - os.mkdir(vector_data_dir) - - # Parse HUCs from hucs_of_interest. - if isinstance(hucs_of_interest,list): - if len(hucs_of_interest) == 1: - try: - with open(hucs_of_interest[0]) as csv_file: # Does not have to be CSV format. - huc_list = [i[0] for i in csv.reader(csv_file)] - except FileNotFoundError: - huc_list = hucs_of_interest - else: - huc_list = hucs_of_interest - elif isinstance(hucs_of_interest,str): - try: - with open(hucs_of_interest) as csv_file: # Does not have to be CSV format. - huc_list = [i[0] for i in csv.reader(csv_file)] - except FileNotFoundError: - huc_list = list(hucs_of_interest) - - # get unique huc4s - huc_list = [h[0:4] for h in huc_list] - huc_list = list( set(huc_list) ) - - # Construct paths to data to download and append to procs_list for multiprocessed pull, project, and converstion to geopackage. - for huc in huc_list: - huc = str(huc) # Ensure huc is string. - - # Construct URL and extraction path for NHDPlus raster. - #nhd_raster_download_url = os.path.join(NHD_URL_PARENT, NHD_URL_PREFIX + huc + NHD_RASTER_URL_SUFFIX) - nhd_raster_download_url = nhd_raster_url_template.format(huc) - nhd_raster_extraction_path = os.path.join(nhd_raster_dir, NHD_URL_PREFIX + huc + NHD_RASTER_URL_SUFFIX) - - # Construct URL and extraction path for NHDPlus vector. Organize into huc-level subdirectories. - #nhd_vector_download_url = os.path.join(NHD_URL_PARENT, NHD_URL_PREFIX + huc + NHD_VECTOR_URL_SUFFIX) - nhd_vector_download_url = nhd_vector_url_template.format(huc) - nhd_vector_download_parent = os.path.join(vector_data_dir, huc) - if not os.path.exists(nhd_vector_download_parent): - os.mkdir(nhd_vector_download_parent) - nhd_vector_extraction_path = os.path.join(nhd_vector_download_parent, NHD_VECTOR_EXTRACTION_PREFIX + huc + NHD_VECTOR_EXTRACTION_SUFFIX) - - # Append extraction instructions to nhd_procs_list. - nhd_procs_list.append([nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path, overwrite_nhd_dem, overwrite_nhd_gdb]) - - # Pull and prepare NHD data. - # with Pool(processes=num_workers) as pool: - # pool.map(pull_and_prepare_nhd_data, nhd_procs_list) - - for huc in nhd_procs_list: - try: - pull_and_prepare_nhd_data(*huc) - except HTTPError: - print("404 error for HUC4 {}".format(huc)) - - # Pull and prepare NWM data. - #pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_preinputs_dir,num_workers) # Commented out for now. - - # Pull and prepare WBD data. - wbd_directory = pull_and_prepare_wbd(path_to_saved_data_parent_dir,NWM_HYDROFABRIC_DIRNAME,NWM_FILE_TO_SUBSET_WITH,overwrite_wbd,num_workers) - - # Create HUC list files. - build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Acquires and preprocesses WBD and NHD data for use in fim_run.sh.') - parser.add_argument('-u','--hucs-of-interest',help='HUC4, series of HUC4s, or path to a line-delimited file of HUC4s to acquire.',required=True,nargs='+') - #parser.add_argument('-j','--num-workers',help='Number of workers to process with',required=False,default=1,type=int) - parser.add_argument('-nd', '--overwrite-nhd-dem', help='Optional flag to overwrite NHDPlus DEM Data',required=False,action='store_true',default=False) - parser.add_argument('-ng', '--overwrite-nhd-gdb', help='Optional flag to overwrite NHDPlus GDB Data',required=False,action='store_true',default=False) - parser.add_argument('-w', '--overwrite-wbd', help='Optional flag to overwrite WBD Data',required=False,action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - manage_preprocessing(**args) diff --git a/data/nws/preprocess_ahps_nws.py b/data/nws/preprocess_ahps_nws.py index 4659e3dc8..9bba37573 100644 --- a/data/nws/preprocess_ahps_nws.py +++ b/data/nws/preprocess_ahps_nws.py @@ -14,6 +14,15 @@ from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + def get_env_paths(): load_dotenv() #import variables from .env file @@ -354,6 +363,15 @@ def preprocess_nws(source_dir, destination, reference_raster): if __name__ == '__main__': + + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + #Parse arguments parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) diff --git a/data/usgs/acquire_and_preprocess_3dep_dems.py b/data/usgs/acquire_and_preprocess_3dep_dems.py index 828c9cdef..1323c8572 100644 --- a/data/usgs/acquire_and_preprocess_3dep_dems.py +++ b/data/usgs/acquire_and_preprocess_3dep_dems.py @@ -174,7 +174,7 @@ def __download_usgs_dems(extent_files, output_folder_path, number_of_jobs, retry sys.exit(1) - # Send the executor to the progress bar and wait for all FR tasks to finish + # Send the executor to the progress bar and wait for all tasks to finish sf.progress_bar_handler(executor_dict, f"Downloading USGG 3Dep Dems") print(f"-- Downloading USGS DEMs Completed") diff --git a/fim_post_processing.sh b/fim_post_processing.sh index c105eacca..b30202cf9 100755 --- a/fim_post_processing.sh +++ b/fim_post_processing.sh @@ -69,7 +69,7 @@ rm -f $outputRunDataDir/logs/subdiv_src_.log # load up enviromental information args_file=$outputRunDataDir/runtime_args.env -gms_inputs=$outputRunDataDir/gms_inputs.csv +fim_inputs=$outputRunDataDir/fim_inputs.csv source $args_file source $outputRunDataDir/params.env @@ -87,7 +87,7 @@ post_proc_start_time=`date +%s` ## AGGREGATE BRANCH LISTS INTO ONE ## echo -e $startDiv"Start branch aggregation" -python3 $srcDir/aggregate_branch_lists.py -d $outputRunDataDir -f "branch_ids.csv" -o $gms_inputs +python3 $srcDir/aggregate_branch_lists.py -d $outputRunDataDir -f "branch_ids.csv" -o $fim_inputs ## GET NON ZERO EXIT CODES FOR UNITS ## ## No longer applicable @@ -96,17 +96,9 @@ python3 $srcDir/aggregate_branch_lists.py -d $outputRunDataDir -f "branch_ids.cs echo -e $startDiv"Start non-zero exit code checking" find $outputRunDataDir/logs/branch -name "*_branch_*.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" > "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" & - -## REMOVE FAILED BRANCHES ## -# Needed in case aggregation fails, we will need the logs -#echo -#echo -e $startDiv"Removing branches that failed with Exit status: 61" -#Tstart -#python3 $srcDir/gms/remove_error_branches.py -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" -g #$gms_inputs - ## RUN AGGREGATE BRANCH ELEV TABLES ## echo "Processing usgs gage aggregation" -python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -gms $gms_inputs +python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -i $fim_inputs ## RUN SYNTHETIC RATING CURVE BANKFULL ESTIMATION ROUTINE ## if [ "$src_bankfull_toggle" = "True" ]; then @@ -191,7 +183,7 @@ echo echo -e $startDiv"Combining crosswalk tables" # aggregate outputs Tstart -python3 /foss_fim/tools/gms_tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv +python3 /foss_fim/tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv Tcount date -u diff --git a/fim_pre_processing.sh b/fim_pre_processing.sh index 260823699..1a5b409c4 100755 --- a/fim_pre_processing.sh +++ b/fim_pre_processing.sh @@ -26,22 +26,22 @@ usage () echo ' Default (if arg not added) : /foss_fim/config/params_template.env' echo ' -ud/--unitDenylist' echo ' A file with a line delimited list of files in UNIT (HUC) directories to be removed' - echo ' upon completion (see config/deny_gms_unit.lst for a starting point)' - echo ' Default (if arg not added) : /foss_fim/config/deny_gms_unit.lst' + echo ' upon completion (see config/deny_unit.lst for a starting point)' + echo ' Default (if arg not added) : /foss_fim/config/deny_unit.lst' echo ' -- Note: if you want to keep all output files (aka.. no files removed),' echo ' use the word NONE as this value for this parameter.' echo ' -bd/--branchDenylist' echo ' A file with a line delimited list of files in BRANCHES directories to be removed' echo ' upon completion of branch processing.' - echo ' (see config/deny_gms_branches.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branches.lst' + echo ' (see config/deny_branches.lst for a starting point)' + echo ' Default: /foss_fim/config/deny_branches.lst' echo ' -- Note: if you want to keep all output files (aka.. no files removed),' echo ' use the word NONE as this value for this parameter.' echo ' -zd/--branchZeroDenylist' echo ' A file with a line delimited list of files in BRANCH ZERO directories to' echo ' be removed upon completion of branch zero processing.' - echo ' (see config/deny_gms_branch_zero.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branch_zero.lst' + echo ' (see config/deny_branch_zero.lst for a starting point)' + echo ' Default: /foss_fim/config/deny_branch_zero.lst' echo ' -- Note: if you want to keep all output files (aka.. no files removed),' echo ' use the word NONE as this value for this parameter.' echo ' -jh/--jobLimit : Max number of concurrent HUC jobs to run. Default 1 job at time.' @@ -141,7 +141,7 @@ if [ -z "$isAWS" ]; then isAWS=0; fi # validate and set defaults for the deny lists if [ "$deny_unit_list" = "" ] then - deny_unit_list=$projectDir/config/deny_gms_unit.lst + deny_unit_list=$projectDir/config/deny_unit.lst elif [ "${deny_unit_list^^}" != "NONE" ] && [ ! -f "$deny_unit_list" ] then # NONE is not case sensitive @@ -152,7 +152,7 @@ fi # validate and set defaults for the deny lists if [ "$deny_branches_list" = "" ] then - deny_branches_list=$projectDir/config/deny_gms_branches.lst + deny_branches_list=$projectDir/config/deny_branches.lst elif [ "${deny_branches_list^^}" != "NONE" ] && [ ! -f "$deny_branches_list" ] then # NONE is not case sensitive @@ -167,7 +167,7 @@ fi has_deny_branch_zero_override=0 if [ "$deny_branch_zero_list" = "" ] then - deny_branch_zero_list=$projectDir/config/deny_gms_branch_zero.lst + deny_branch_zero_list=$projectDir/config/deny_branch_zero.lst elif [ "${deny_branch_zero_list^^}" != "NONE" ] # NONE is not case sensitive then if [ ! -f "$deny_branch_zero_list" ] @@ -211,7 +211,7 @@ else rm -rdf $outputRunDataDir/unit_errors rm -rdf $outputRunDataDir/eval rm -f $outputRunDataDir/crosswalk_table.csv - rm -f $outputRunDataDir/gms_inputs* + rm -f $outputRunDataDir/fim_inputs* rm -f $outputRunDataDir/*.env fi diff --git a/gms_pipeline.sh b/gms_pipeline.sh deleted file mode 100755 index 26c7694f1..000000000 --- a/gms_pipeline.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo - echo 'Produce GMS hydrofabric datasets for unit and branch scale.' - echo 'Usage : gms_pipeline.sh [REQ: -u -n ]' - echo ' [OPT: -h -c -j ] -o' - echo ' -ud ' - echo ' -bd ' - echo ' -zd ]' - echo '' - echo 'REQUIRED:' - echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited) file.' - echo ' A line delimited file is also acceptable. HUCs must present in inputs directory.' - echo ' -n/--runName : a name to tag the output directories and log files as. could be a version tag.' - echo - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' Default (if arg not added) : /foss_fim/config/params_template.env' - echo ' -ud/--unitDenylist : A file with a line delimited list of files in UNIT (HUC) directories to be removed' - echo ' upon completion (see config/deny_gms_unit.lst for a starting point)' - echo ' Default (if arg not added) : /foss_fim/config/deny_gms_unit.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -bd/--branchDenylist : A file with a line delimited list of files in BRANCHES directories to be removed' - echo ' upon completion of branch processing.' - echo ' (see config/deny_gms_branches.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branches.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -zd/--branchZeroDenylist : A file with a line delimited list of files in BRANCH ZERO directories to' - echo ' be removed upon completion of branch zero processing.' - echo ' (see config/deny_gms_branch_zero.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branch_zero.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time.' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo ' -o/--overwrite : overwrite outputs if already exist' - echo - exit -} - -set -e - -while [ "$1" != "" ]; do -case $1 -in - -u|--hucList) - shift - hucList=$1 - ;; - -c|--configFile ) - shift - envFile=$1 - ;; - -n|--runName) - shift - runName=$1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - -o|--overwrite) - overwrite=1 - ;; - -ud|--unitDenylist) - shift - deny_unit_list=$1 - ;; - -bd|--branchDenylist) - shift - deny_branches_list=$1 - ;; - -zd|--branchZeroDenylist) - shift - deny_branch_zero_list=$1 - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$hucList" = "" ] -then - echo "ERROR: Missing -u Huclist argument" - usage -fi -if [ "$runName" = "" ] -then - echo "ERROR: Missing -n run time name argument" - usage -fi - -if [ "$envFile" = "" ] -then - envFile=/foss_fim/config/params_template.env -fi - -if [ -z "$overwrite" ] -then - # default is false (0) - overwrite=0 -fi - -# The tests for the deny lists are duplicated here on to help catch -# them earlier (ie.. don't have to wait to process units to find an -# pathing error with the branch deny list) -if [ "$deny_unit_list" != "" ] && \ - [ "${deny_unit_list^^}" != "NONE" ] && \ - [ ! -f "$deny_unit_list" ] -then - # NONE is not case sensitive - echo "Error: The -ud does not exist and is not the word NONE" - usage -fi - -if [ "$deny_branches_list" != "" ] && \ - [ "${deny_branches_list^^}" != "NONE" ] && \ - [ ! -f "$deny_branches_list" ] -then - # NONE is not case sensitive - echo "Error: The -bd does not exist and is not the word NONE" - usage -fi - -if [ "$deny_branch_zero_list" != "" ] && \ - [ "${deny_branch_zero_list^^}" != "NONE" ] && \ - [ ! -f "$deny_branch_zero_list" ] -then - echo "Error: The -zd does not exist and is not the word NONE" - usage -fi - -## SOURCE ENV FILE AND FUNCTIONS ## -source $envFile -source $srcDir/bash_functions.env - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=1 -fi - -export outputRunDataDir=$outputDataDir/$runName - -if [ -d $outputRunDataDir ] && [ $overwrite -eq 0 ]; then - echo - echo "ERROR: Output dir $outputRunDataDir exists. Use overwrite -o to run." - echo - usage -fi - -pipeline_start_time=`date +%s` - -num_hucs=$(python3 $srcDir/check_huc_inputs.py -u $hucList) - -echo -echo "======================= Start of gms_pipeline.sh =========================" -echo "Number of HUCs to process is $num_hucs" - -## Produce gms hydrofabric at unit level first (gms_run_unit) - -# We have to build this as a string as some args are optional. -# but the huclist doesn't always pass well, so just worry about -# the rest of the params. -run_cmd=" -n $runName" -run_cmd+=" -c $envFile" -run_cmd+=" -j $jobLimit" - -if [ $overwrite -eq 1 ]; then run_cmd+=" -o" ; fi - -#echo "$run_cmd" -. /foss_fim/gms_run_unit.sh -u "$hucList" $run_cmd -ud "$deny_unit_list" -zd "$deny_branch_zero_list" - -## CHECK IF OK TO CONTINUE ON TO BRANCH STEPS -# Count the number of files in the $outputRunDataDir/unit_errors -# If no errors, there will be only one file, non_zero_exit_codes.log. -# Calculate the number of error files as a percent of the number of hucs -# originally submitted. If the percent error is over "x" threshold stop processing -# Note: This applys only if there are a min number of hucs. Aka.. if min threshold -# is set to 10, then only return a sys.exit of > 1, if there is at least 10 errors - -# if this has too many errors, it will return a sys.exit code (like 62 as per fim_enums) -# and we will stop the rest of the process. We have to catch stnerr as well. -# This stops the run from continuing to run, drastically filing drive and killing disk space. -python3 $srcDir/check_unit_errors.py -f $outputRunDataDir -n $num_hucs - -## Produce level path or branch level datasets -. /foss_fim/gms_run_branch.sh $run_cmd -bd "$deny_branches_list" -zd "$deny_branch_zero_list" - - -## continue on to post processing -. /foss_fim/gms_run_post_processing.sh $run_cmd - -echo -echo "======================== End of gms_pipeline.sh ==========================" -date -u -Calc_Duration $pipeline_start_time -echo - diff --git a/gms_run_branch.sh b/gms_run_branch.sh deleted file mode 100755 index 9c38437d8..000000000 --- a/gms_run_branch.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Produce GMS hydrofabric at levelpath/branch scale. Execute gms_run_unit.sh prior to.' - echo 'Usage : gms_run_branch.sh [REQ: -n ]' - echo ' [OPT: -h -u -c -j ] -o' - echo ' -bd ' - echo ' -zd ]' - echo '' - echo 'REQUIRED:' - echo ' -n/--runName : A name to tag the output directories and log files as. could be a version tag.' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited).' - echo ' A line delimited file also acceptable. HUCs must present in inputs directory.' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' default (if arg not added) : /foss_fim/config/params_template.env' - echo ' -bd/--branchDenylist : A file with a line delimited list of files in BRANCHES directories to be removed' - echo ' upon completion of branch processing.' - echo ' (see config/deny_gms_branches.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branches.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -zd/--branchZeroDenylist : A file with a line delimited list of files in BRANCH ZERO directories to' - echo ' be removed upon completion of branch zero processing.' - echo ' (see config/deny_gms_branch_zero.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branch_zero.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo ' -o/--overwrite : overwrite outputs if already exist' - echo - exit -} - -while [ "$1" != "" ]; do -case $1 -in - -c|--configFile ) - shift - envFile=$1 - ;; - -n|--runName) - shift - runName=$1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -u|--hucList) - shift - hucList=$1 - ;; - -h|--help) - shift - usage - ;; - -o|--overwrite) - overwrite=1 - ;; - -bd|--branchDenylist) - shift - deny_branches_list=$1 - ;; - -zd|--branchZeroDenylist) - shift - deny_branch_zero_list_for_branches=$1 - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$runName" = "" ] -then - echo "ERROR: Missing -n run time name argument" - usage -fi - -if [ "$envFile" = "" ] -then - envFile=/foss_fim/config/params_template.env -fi - -if [ "$deny_branches_list" = "" ] -then - deny_branches_list=/foss_fim/config/deny_gms_branches.lst -elif [ "${deny_branches_list^^}" != "NONE" ] && [ ! -f "$deny_branches_list" ] -then - # NONE is not case sensitive - echo "Error: The -bd does not exist and is not the word NONE" - usage -fi - -# Yes.. we have to have a different variable names for the deny_branch_zero_list_for_branches -# and deny_branch_zero_list_for_units. While they both use the same input arg, they use -# the value slightly different and when using gms_pipeline, the values can impact -# other bash files. -if [ "$deny_branch_zero_list_for_branches" = "" ] -then - deny_branch_zero_list_for_branches=/foss_fim/config/deny_gms_branch_zero.lst -elif [ "${deny_branch_zero_list_for_branches^^}" != "NONE" ] # NONE is not case sensitive -then - if [ ! -f "$deny_branch_zero_list_for_branches" ] - then - echo "Error: The -zd does not exist and is not the word NONE" - usage - else - # only if the deny branch zero has been overwritten and file exists - has_deny_branch_zero_override=1 - fi -else - has_deny_branch_zero_override=1 # it is the value of NONE and is overridden -fi - -if [ "$overwrite" = "" ] -then - overwrite=0 -fi - -## SOURCE ENV FILE AND FUNCTIONS ## -source $envFile -source $srcDir/bash_functions.env -source $srcDir/bash_variables.env - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=1 -fi - -## Define Outputs Data Dir & Log File## -export outputRunDataDir=$outputDataDir/$runName -export deny_branches_list=$deny_branches_list -logFile=$outputRunDataDir/logs/branch/summary_gms_branch.log -export overwrite=$overwrite - -## Check for run data directory and the file. If gms_run_unit failed, the file will not be there ## -if [ ! -f "$outputRunDataDir/gms_inputs.csv" ]; then - echo "Depends on output from gms_run_unit.sh. Please produce data with gms_run_unit.sh first." - exit 1 -fi - -## Filter out hucs ## -if [ "$hucList" = "" ]; then - gms_inputs=$outputRunDataDir/gms_inputs.csv -else - $srcDir/gms/filter_gms_inputs_by_huc.py -g $outputRunDataDir/gms_inputs.csv -u $hucList -o $outputRunDataDir/gms_inputs_filtered.csv - gms_inputs=$outputRunDataDir/gms_inputs_filtered.csv -fi - -# make log dir -if [ ! -d "$outputRunDataDir/logs/branch" ]; then - mkdir -p $outputRunDataDir/logs/branch -elif [ $overwrite -eq 1 ]; then - # need to clean it out if we are overwriting - rm -rdf $outputRunDataDir/logs/branch - mkdir -p $outputRunDataDir/logs/branch -fi - -# Note: Other parts of the program will check for the existance of the file -# /branch_errors/non_zero_exit_codes.log. It has to be removed no matter -# what on each run of gms_run_branch -if [ ! -d "$outputRunDataDir/branch_errors" ]; then - mkdir -p "$outputRunDataDir/branch_errors" -elif [ $overwrite -eq 1 ]; then - rm -rdf $outputRunDataDir/branch_errors - mkdir -p $outputRunDataDir/branch_errors -fi - -## RUN GMS BY BRANCH ## -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- Start of branch processing" -echo "---- Started: `date -u`" -T_total_start -Tstart -all_branches_start_time=`date +%s` - -if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --timeout $branch_timeout --lb -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs -else - parallel --eta --timeout $branch_timeout -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs -fi - -echo "Branch processing is complete" -Tcount -date -u - -# ------------------- -## REMOVE FILES FROM DENY LIST FOR BRANCH ZERO (but using normal branch deny) ## -## but also do not remove if branch zero deny is NONE (any case) - -# If the deny branch zero has been overridden, then use it (file path proven above). -# Override might be the value of None (case not sensitive) -# Else then use the default deny branch (not the zero) which might also be None and that is ok - -# If deny branch zero is NONE.. then skip -# if deny branch zero has an override deny list, use it. -# if deny branch zero is not NONE and deny list is not overridden -# then see if reg deny branch is NONE. If so.. skip -# else, use the deny branch list instead to do final cleanup on branch zero - -if [ "$has_deny_branch_zero_override" == "1" ] -then - echo -e $startDiv"Cleaning up (Removing) files for branch zero for all HUCs" - $srcDir/gms/outputs_cleanup.py -d $outputRunDataDir -l $deny_branch_zero_list_for_branches -b 0 - -else - echo -e $startDiv"Cleaning up (Removing) files all branch zero for all HUCs using the default branch deny list" - $srcDir/gms/outputs_cleanup.py -d $outputRunDataDir -l $deny_branches_list -b 0 -fi - - -# ------------------- -## GET NON ZERO EXIT CODES ## -# Needed in case aggregation fails, we will need the logs -# Note: Other parts of the program (gms_run_post_processing.sh) check to see -# if the branch_errors/non_zero_exit_codes.log exists. If it does not, it assumes -# that gms_run_branch did not complete (or was not run) -echo -echo -e $startDiv"Start non-zero exit code checking"$stopDiv -find $outputRunDataDir/logs/branch -name "*_branch_*.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" >"$outputRunDataDir/branch_errors/non_zero_exit_codes.log" & - -# ------------------- -## REMOVE FAILED BRANCHES ## -# Needed in case aggregation fails, we will need the logs -echo -echo -e $startDiv"Removing branches that failed with Exit status: 61"$stopDiv -Tstart -python3 $srcDir/gms/remove_error_branches.py -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" -g $outputRunDataDir/gms_inputs.csv -Tcount -date -u - -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- gms_run_branch complete" -echo "---- Ended: `date -u`" -Calc_Duration $all_branches_start_time -echo diff --git a/gms_run_post_processing.sh b/gms_run_post_processing.sh deleted file mode 100755 index e83404dca..000000000 --- a/gms_run_post_processing.sh +++ /dev/null @@ -1,184 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Produce GMS hydrofabric at levelpath/branch scale. Execute gms_run_unit.sh and gms_run_branch prior to.' - echo 'Usage : gms_run_post_processing.sh [REQ: -n ]' - echo ' [OPT: -h -c -j ]' - echo '' - echo 'REQUIRED:' - echo ' -n/--runName : A name to tag the output directories and log files as. could be a version tag.' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' default (if arg not added) : /foss_fim/config/params_template.env' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo - exit -} - -while [ "$1" != "" ]; do -case $1 -in - -c|--configFile ) - shift - envFile=$1 - ;; - -n|--runName) - shift - runName=$1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$runName" = "" ] -then - echo "ERROR: Missing -n run time name argument" - usage -fi - -if [ "$envFile" = "" ] -then - envFile=/foss_fim/config/params_template.env -fi - -## SOURCE ENV FILE AND FUNCTIONS ## -source $envFile -source $srcDir/bash_functions.env -source $srcDir/bash_variables.env - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=1 -fi - -## Define Outputs Data Dir & Log File## -export outputRunDataDir=$outputDataDir/$runName -export extent=GMS - -## Check for run data directory ## -if [ ! -d "$outputRunDataDir" ]; then - echo "Depends on output from gms_run_unit.sh. Please produce data with gms_run_unit.sh first." - exit 1 -fi - -## Check to ensure gms_run_branch completed ## -if [ ! -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" ]; then - echo "Depends on output from gms_run_branch.sh. Please run gms_run_branch.sh or check if it failed." - exit 1 -fi - -# Clean out the other post processing files before starting -rm -rdf $outputRunDataDir/logs/src_optimization -rm -f $outputRunDataDir/logs/log_bankfull_indentify.log -rm -f $outputRunDataDir/logs/subdiv_src_.log - -gms_inputs=$outputRunDataDir/gms_inputs.csv - -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- Start of gms_run_post_processing" -echo "---- Started: `date -u`" -T_total_start -post_proc_start_time=`date +%s` - -## RUN AGGREGATE BRANCH ELEV TABLES ## -# TODO: How do we skip aggregation if there is a branch error -# maybe against the non_zero logs above -echo -echo "Processing usgs gage aggregation" -python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -gms $gms_inputs - -## RUN SYNTHETIC RATING CURVE BANKFULL ESTIMATION ROUTINE ## -if [ "$src_bankfull_toggle" = "True" ]; then - echo -e $startDiv"Estimating bankfull stage in SRCs"$stopDiv - # Run SRC bankfull estimation routine routine - Tstart - time python3 /foss_fim/src/identify_src_bankfull.py -fim_dir $outputRunDataDir -flows $bankfull_flows_file -j $jobLimit - Tcount -fi - -## RUN SYNTHETIC RATING SUBDIVISION ROUTINE ## -if [ "$src_subdiv_toggle" = "True" ]; then - echo -e $startDiv"Performing SRC channel/overbank subdivision routine"$stopDiv - # Run SRC Subdivision & Variable Roughness routine - Tstart - time python3 /foss_fim/src/subdiv_chan_obank_src.py -fim_dir $outputRunDataDir -mann $vmann_input_file -j $jobLimit - Tcount -fi - -## CONNECT TO CALIBRATION POSTGRESQL DATABASE (OPTIONAL) ## -if [ "$src_adjust_spatial" = "True" ]; then - if [ ! -f $CALB_DB_KEYS_FILE ]; then - echo "ERROR! - the src_adjust_spatial parameter in the params_template.env (or equiv) is set to "True" (see parameter file), but the provided calibration database access keys file does not exist: $CALB_DB_KEYS_FILE" - exit 1 - else - source $CALB_DB_KEYS_FILE - : ' - This makes the local variables from the calb_db_keys files - into global variables that can be used in other files, including python. - - Why not just leave the word export in front of each of the keys in the - calb_db_keys.env? Becuase that file is used against docker-compose - when we start up that part of the sytem and it does not like the word - export. - ' - export CALIBRATION_DB_HOST=$CALIBRATION_DB_HOST - export CALIBRATION_DB_NAME=$CALIBRATION_DB_NAME - export CALIBRATION_DB_USER_NAME=$CALIBRATION_DB_USER_NAME - export CALIBRATION_DB_PASS=$CALIBRATION_DB_PASS - export DEFAULT_FIM_PROJECTION_CRS=$DEFAULT_FIM_PROJECTION_CRS - echo "Populate PostgrSQL database with benchmark FIM extent points and HUC attributes (the calibration database)" - echo "Loading HUC Data" - time ogr2ogr -overwrite -nln hucs -t_srs $DEFAULT_FIM_PROJECTION_CRS -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $inputDataDir/wbd/WBD_National.gpkg WBDHU8 - echo "Loading Point Data" - time ogr2ogr -overwrite -t_srs $DEFAULT_FIM_PROJECTION_CRS -f PostgreSQL PG:"host=$CALIBRATION_DB_HOST dbname=$CALIBRATION_DB_NAME user=$CALIBRATION_DB_USER_NAME password=$CALIBRATION_DB_PASS" $fim_obs_pnt_data usgs_nws_benchmark_points -nln points - fi -fi - -## RUN SYNTHETIC RATING CURVE CALIBRATION W/ USGS GAGE RATING CURVES ## -if [ "$src_adjust_usgs" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then - Tstart - echo -e $startDiv"Performing SRC adjustments using USGS rating curve database"$stopDiv - # Run SRC Optimization routine using USGS rating curve data (WSE and flow @ NWM recur flow thresholds) - python3 $srcDir/src_adjust_usgs_rating.py -run_dir $outputRunDataDir -usgs_rc $inputDataDir/usgs_gages/usgs_rating_curves.csv -nwm_recur $nwm_recur_file -j $jobLimit - Tcount - date -u -fi - -## RUN SYNTHETIC RATING CURVE CALIBRATION W/ BENCHMARK POINT DATABASE (POSTGRESQL) ## -if [ "$src_adjust_spatial" = "True" ] && [ "$src_subdiv_toggle" = "True" ]; then - Tstart - echo -e $startDiv"Performing SRC adjustments using benchmark point database"$stopDiv - python3 $srcDir/src_adjust_spatial_obs.py -fim_dir $outputRunDataDir -j $jobLimit - Tcount - date -u -fi - -echo -echo -e $startDiv"Combining crosswalk tables"$stopDiv -# aggregate outputs -Tstart -python3 /foss_fim/tools/gms_tools/combine_crosswalk_tables.py -d $outputRunDataDir -o $outputRunDataDir/crosswalk_table.csv -Tcount -date -u - -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- gms_run_post_processing complete" -echo "---- Ended: `date -u`" -Calc_Duration $post_proc_start_time -echo diff --git a/gms_run_unit.sh b/gms_run_unit.sh deleted file mode 100755 index dd4b46d08..000000000 --- a/gms_run_unit.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Produce GMS hydrofabric datasets for unit scale.' - echo 'Usage : gms_run_unit.sh [REQ: -u -n ]' - echo ' [OPT: -h -c -j -o' - echo ' -ud ' - echo ' -zd ]' - echo '' - echo 'REQUIRED:' - echo ' -u/--hucList : HUC8s to run or multiple passed in quotes (space delimited) file.' - echo ' A line delimited file is also acceptable. HUCs must present in inputs directory.' - echo ' -n/--runName : a name to tag the output directories and log files as. could be a version tag.' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -c/--config : configuration file with bash environment variables to export' - echo ' Default (if arg not added) : /foss_fim/config/params_template.env' - echo ' -ud/--unitDenylist : A file with a line delimited list of files in UNIT (HUC) directories to be removed' - echo ' upon completion (see config/deny_gms_unit.lst for a starting point)' - echo ' Default (if arg not added) : /foss_fim/config/deny_gms_unit.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -zd/--branchZeroDenylist : A file with a line delimited list of files in BRANCH ZERO directories to' - echo ' be removed upon completion of branch zero processing.' - echo ' (see config/deny_gms_branch_zero.lst for a starting point)' - echo ' Default: /foss_fim/config/deny_gms_branch_zero.lst' - echo ' -- Note: if you want to keep all output files (aka.. no files removed),' - echo ' use the word NONE as this value for this parameter.' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time.' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - echo ' -o/--overwrite : overwrite outputs if already exist' - echo - exit -} - -while [ "$1" != "" ]; do -case $1 -in - -u|--hucList) - shift - hucList="$1" - ;; - -c|--configFile ) - shift - envFile=$1 - ;; - -n|--runName) - shift - runName=$1 - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - -o|--overwrite) - overwrite=1 - ;; - -ud|--unitDenylist) - shift - deny_unit_list=$1 - ;; - -zd|--branchZeroDenylist) - shift - deny_branch_zero_list_for_units=$1 - ;; - *) ;; - esac - shift -done - -# print usage if arguments empty -if [ "$hucList" = "" ] -then - echo "ERROR: Missing -u Huclist argument" - usage -fi -if [ "$runName" = "" ] -then - echo "ERROR: Missing -n run time name argument" - usage -fi - -if [ "$envFile" = "" ] -then - envFile=/foss_fim/config/params_template.env -fi - -if [ "$deny_unit_list" = "" ] -then - deny_unit_list=/foss_fim/config/deny_gms_unit.lst -elif [ "${deny_unit_list^^}" != "NONE" ] && [ ! -f "$deny_unit_list" ] -then - # NONE is not case sensitive - echo "Error: The -ud does not exist and is not the word NONE" - usage -fi - -if [ "$deny_branch_zero_list_for_units" = "" ] -then - deny_branch_zero_list_for_units=/foss_fim/config/deny_gms_branch_zero.lst -elif [ "${deny_branch_zero_list_for_units^^}" != "NONE" ] # NONE is not case sensitive -then - if [ ! -f "$deny_branch_zero_list_for_units" ] - then - echo "Error: The -zd does not exist and is not the word NONE" - usage - fi -fi - -if [ -z "$overwrite" ] -then - overwrite=0 -fi - -## SOURCE ENV FILE AND FUNCTIONS ## -source $envFile -source $srcDir/bash_functions.env -source $srcDir/bash_variables.env - -# default values -if [ "$jobLimit" = "" ] ; then - jobLimit=1 -fi - -## Define Outputs Data Dir & Log File## -export outputRunDataDir=$outputDataDir/$runName -logFile=$outputRunDataDir/logs/unit/summary_gms_unit.log - -if [ -d $outputRunDataDir ] && [ $overwrite -eq 0 ]; then - echo - echo "ERROR: Output dir $outputRunDataDir exists. Use overwrite -o to run." - echo - usage -fi - -## Define inputs -export overwrite=$overwrite -export deny_gms_unit_list=$deny_gms_unit_list -export deny_unit_list=$deny_unit_list -export deny_branch_zero_list_for_units=$deny_branch_zero_list_for_units - -# we are not using the variable output at this time, but keep it anways -num_hucs=$(python3 $srcDir/check_huc_inputs.py -u $hucList) - -# make dirs -if [ ! -d $outputRunDataDir ]; then - mkdir -p $outputRunDataDir -fi - -# remove these directories on a new or overwrite run -rm -rdf $outputRunDataDir/logs -rm -rdf $outputRunDataDir/branch_errors -rm -rdf $outputRunDataDir/unit_errors - -# we need to clean out the all log files and some other files overwrite or not -mkdir -p $outputRunDataDir/logs/unit -mkdir -p $outputRunDataDir/unit_errors -rm -f $outputRunDataDir/gms_inputs* - -# copy over config file -cp -a $envFile $outputRunDataDir - -## RUN GMS BY UNIT ## -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- Start of gms_run_unit" -echo "---- Started: `date -u`" -all_units_start_time=`date +%s` - -## GMS BY UNIT## -if [ -f "$hucList" ]; then - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList - else - parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh :::: $hucList - fi -else - if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList - else - parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/gms/time_and_tee_run_by_unit.sh ::: $hucList - fi - fi - -echo "Unit (HUC) processing is complete" -date -u - -## GET NON ZERO EXIT CODES ## -# Needed in case aggregation fails, we will need the logs -echo -e $startDiv"Start of non zero exit codes check"$stopDiv -find $outputRunDataDir/logs/ -name "*_unit.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" >"$outputRunDataDir/unit_errors/non_zero_exit_codes.log" & - -## AGGREGATE BRANCH LISTS INTO ONE ## -echo -e $startDiv"Start branch aggregation"$stopDiv -python3 $srcDir/gms/aggregate_branch_lists.py -d $outputRunDataDir -f "gms_inputs.csv" -l $hucList - -echo -echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -echo "---- gms_run_unit is complete" -echo "---- Ended: `date -u`" -Calc_Duration $all_units_start_time -echo \ No newline at end of file diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 88398aa95..a690dfce4 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -17,7 +17,22 @@ @mem_profile -def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): +def add_crosswalk(input_catchments_fileName, + input_flows_fileName, + input_srcbase_fileName, + output_catchments_fileName, + output_flows_fileName, + output_src_fileName, + output_src_json_fileName, + output_crosswalk_fileName, + output_hydro_table_fileName, + input_huc_fileName, + input_nwmflows_fileName, + input_nwmcatras_fileName, + mannings_n, + input_nwmcat_fileName, + small_segments_filename, + calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) @@ -26,95 +41,63 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f min_catchment_area = float(os.environ['min_catchment_area']) #0.25# min_stream_length = float(os.environ['min_stream_length']) #0.5# - if extent == 'FR': - ## crosswalk using majority catchment method + ## crosswalk using stream segment midpoint method + input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) - # calculate majority catchments - majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) - input_majorities = gpd.GeoDataFrame.from_features(majority_calc) - input_majorities = input_majorities.rename(columns={'majority' : 'feature_id'}) + input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) + if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) + input_nwmcat=input_nwmcat.set_index('feature_id') - input_majorities = input_majorities[:][input_majorities['feature_id'].notna()] - if input_majorities.feature_id.dtype != 'int': input_majorities.feature_id = input_majorities.feature_id.astype(int) - if input_majorities.HydroID.dtype != 'int': input_majorities.HydroID = input_majorities.HydroID.astype(int) + input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - relevant_input_nwmflows = input_nwmflows[input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] - relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id','order_']) + # Get stream midpoint + stream_midpoint = [] + hydroID = [] + for i,lineString in enumerate(input_flows.geometry): + hydroID = hydroID + [input_flows.loc[i,'HydroID']] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') + input_flows_midpoint = input_flows_midpoint.set_index('HydroID') - if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) - output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') - output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') + # Create crosswalk + crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() + crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) - elif (extent == 'MS') | (extent == 'GMS'): - ## crosswalk using stream segment midpoint method - input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + # fill in missing ms + crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] + for index, stream in crosswalk_missing.iterrows(): - # only reduce nwm catchments to mainstems if running mainstems - if extent == 'MS': - input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] + # find closest nwm catchment by distance + distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] + min_dist = min(distances) + nwmcat_index=distances.index(min_dist) - input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) - if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) - input_nwmcat=input_nwmcat.set_index('feature_id') + # update crosswalk + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area - input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) + crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') - # Get stream midpoint - stream_midpoint = [] - hydroID = [] - for i,lineString in enumerate(input_flows.geometry): - hydroID = hydroID + [input_flows.loc[i,'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] + if len(crosswalk) < 1: + print ("No relevant streams within HUC boundaries.") + sys.exit(0) - input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') - input_flows_midpoint = input_flows_midpoint.set_index('HydroID') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk,on='HydroID') - # Create crosswalk - crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() - crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(crosswalk,on='HydroID') - # fill in missing ms - crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] - for index, stream in crosswalk_missing.iterrows(): + # Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh + if 'areasqkm' not in output_catchments.columns: + output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) - # find closest nwm catchment by distance - distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] - min_dist = min(distances) - nwmcat_index=distances.index(min_dist) - - # update crosswalk - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area - - crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) - crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') - - if len(crosswalk) < 1: - print ("No relevant streams within HUC boundaries.") - sys.exit(0) - - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk,on='HydroID') - - if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(crosswalk,on='HydroID') - - # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh - if 'areasqkm' not in output_catchments.columns: - output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) - - output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') output_flows['ManningN'] = mannings_n @@ -212,22 +195,14 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f for src_index, src_stage in new_values.iterrows(): output_src.loc[(output_src['HydroID']== short_id) & (output_src['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1] - if extent == 'FR': - output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - elif (extent == 'MS') | (extent == 'GMS'): - output_src = output_src.merge(crosswalk[['HydroID','feature_id']],on='HydroID') + output_src = output_src.merge(crosswalk[['HydroID','feature_id']],on='HydroID') output_crosswalk = output_src[['HydroID','feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) - ## bathy estimation integration in synthetic rating curve calculations - #if (bathy_src_calc == True and extent == 'MS'): - # output_src = bathy_rc_lookup(output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName) - #else: - # print('Note: NOT using bathy estimation approach to modify the SRC...') - # make hydroTable output_hydro_table = output_src.loc[:,['HydroID','feature_id','NextDownID','order_','Number of Cells','SurfaceArea (m2)','BedArea (m2)','TopWidth (m)','LENGTHKM','AREASQKM','WettedPerimeter (m)','HydraulicRadius (m)','WetArea (m2)','Volume (m3)','SLOPE','ManningN','Stage','Discharge (m3s-1)']] + output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) ## Set placeholder variables to be replaced in post-processing (as needed). Create here to ensure consistent column vars ## These variables represent the original unmodified values @@ -298,7 +273,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves') + parser = argparse.ArgumentParser(description='Crosswalk for networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) @@ -313,12 +288,14 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f parser.add_argument('-y','--input-nwmcatras-fileName',help='NWM catchment raster',required=False) parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True) parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True) - parser.add_argument('-p','--extent',help='MS or FR extent',required=True) parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True) parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true') args = vars(parser.parse_args()) + add_crosswalk(**args) + + ''' input_catchments_fileName = args['input_catchments_fileName'] input_flows_fileName = args['input_flows_fileName'] input_srcbase_fileName = args['input_srcbase_fileName'] @@ -333,8 +310,13 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f input_nwmcatras_fileName = args['input_nwmcatras_fileName'] mannings_n = args['mannings_n'] input_nwmcat_fileName = args['input_nwmcat_fileName'] - extent = args['extent'] small_segments_filename = args['small_segments_filename'] calibration_mode = args['calibration_mode'] - add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) + add_crosswalk(input_catchments_fileName, + input_flows_fileName, + input_srcbase_fileName, + output_catchments_fileName, + output_flows_fileName, + output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,small_segments_filename,calibration_mode) + ''' \ No newline at end of file diff --git a/src/bash_variables.env b/src/bash_variables.env index 336ed61e3..c2ba361ab 100644 --- a/src/bash_variables.env +++ b/src/bash_variables.env @@ -1,7 +1,6 @@ ## Define inputs # NOTE: $inputDataDir is defined in Dockerfile export DEFAULT_FIM_PROJECTION_CRS=EPSG:5070 -export extent=GMS export input_DEM=$inputDataDir/3dep_dems/10m_5070/fim_seamless_3dep_dem_10m_5070.vrt export input_DEM_domain=$inputDataDir/3dep_dems/10m_5070/HUC6_dem_domain.gpkg export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg diff --git a/src/gms/buffer_stream_branches.py b/src/buffer_stream_branches.py similarity index 100% rename from src/gms/buffer_stream_branches.py rename to src/buffer_stream_branches.py diff --git a/src/gms/clip_rasters_to_branches.py b/src/clip_rasters_to_branches.py similarity index 100% rename from src/gms/clip_rasters_to_branches.py rename to src/clip_rasters_to_branches.py diff --git a/src/gms/crosswalk_nwm_demDerived.py b/src/crosswalk_nwm_demDerived.py similarity index 99% rename from src/gms/crosswalk_nwm_demDerived.py rename to src/crosswalk_nwm_demDerived.py index 97f815ffd..55ef7e36e 100755 --- a/src/gms/crosswalk_nwm_demDerived.py +++ b/src/crosswalk_nwm_demDerived.py @@ -4,12 +4,12 @@ import pandas as pd import numpy as np import argparse +import stream_branches as sb + from utils.shared_functions import getDriver from utils.shared_variables import FIM_ID -from gms import stream_branches as sb from shapely.geometry import MultiLineString - def Crosswalk_nwm_demDerived(nwm_streams, demDerived, wbd=None, node_prefix=None, sampling_size=None, crosswalk_outfile=None, demDerived_outfile=None, nwm_outfile=None, verbose=False): diff --git a/src/gms/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh similarity index 94% rename from src/gms/delineate_hydros_and_produce_HAND.sh rename to src/delineate_hydros_and_produce_HAND.sh index c27de85cd..022c36e0e 100755 --- a/src/gms/delineate_hydros_and_produce_HAND.sh +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -11,7 +11,7 @@ if [ "$mask_leveed_area_toggle" = "True" ] && [ -f $outputHucDataDir/LeveeProtec echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id" date -u Tstart - python3 -m memory_profiler $srcDir/gms/mask_dem.py -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -nld $outputHucDataDir/LeveeProtectedAreas_subset.gpkg -out $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -s $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -i $current_branch_id -b0 $branch_zero_id + python3 -m memory_profiler $srcDir/mask_dem.py -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -nld $outputHucDataDir/LeveeProtectedAreas_subset.gpkg -out $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -s $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -i $current_branch_id -b0 $branch_zero_id Tcount fi @@ -103,7 +103,7 @@ Tcount echo -e $startDiv"D8 REM $hucNumber $current_branch_id" date -u Tstart -$srcDir/gms/make_rem.py -d $outputCurrentBranchDataDir/dem_thalwegCond_"$current_branch_id".tif -w $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/rem_$current_branch_id.tif -t $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif +$srcDir/make_rem.py -d $outputCurrentBranchDataDir/dem_thalwegCond_"$current_branch_id".tif -w $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/rem_$current_branch_id.tif -t $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif Tcount ## BRING DISTANCE DOWN TO ZERO & MASK TO CATCHMENTS## @@ -183,5 +183,5 @@ elif [ "$level" = "unit" ]; then b_arg=$outputHucDataDir/nwm_subset_streams.gpkg z_arg=$outputHucDataDir/nwm_catchments_proj_subset.gpkg fi -python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $b_arg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $z_arg -p $extent -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv +python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $b_arg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $z_arg -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv Tcount diff --git a/src/gms/derive_level_paths.py b/src/derive_level_paths.py similarity index 97% rename from src/gms/derive_level_paths.py rename to src/derive_level_paths.py index fd74ddbd3..f34dc494c 100755 --- a/src/gms/derive_level_paths.py +++ b/src/derive_level_paths.py @@ -28,18 +28,18 @@ def Derive_level_paths(in_stream_network, out_stream_network, branch_id_attribut stream_network = StreamNetwork.from_file(filename=in_stream_network) else: print("Sorry, no branches exist and processing can not continue. This could be an empty file.") - sys.exit(FIM_exit_codes.GMS_UNIT_NO_BRANCHES.value) # will send a 60 back + sys.exit(FIM_exit_codes.UNIT_NO_BRANCHES.value) # will send a 60 back # if there are no reaches at this point if (len(stream_network) == 0): # This is technically not an error but we need to have it logged so the user know what # happened to it and we need the huc to not be included in future processing. - # We need it to be not included in the gms_input.csv at the end of the unit processing. + # We need it to be not included in the fim_input.csv at the end of the unit processing. # Throw an exception with valid text. This will show up in the non-zero exit codes and explain why an error. # Later, we can look at creating custom sys exit codes # raise UserWarning("Sorry, no branches exist and processing can not continue. This could be an empty file.") print("Sorry, no branches exist and processing can not continue. This could be an empty file.") - sys.exit(FIM_exit_codes.GMS_UNIT_NO_BRANCHES.value) # will send a 60 back + sys.exit(FIM_exit_codes.UNIT_NO_BRANCHES.value) # will send a 60 back # values_exluded of 1 and 2 mean where are dropping stream orders 1 and 2. We are leaving those # for branch zero. diff --git a/src/gms/edit_points.py b/src/edit_points.py similarity index 100% rename from src/gms/edit_points.py rename to src/edit_points.py diff --git a/src/gms/filter_gms_inputs_by_huc.py b/src/filter_inputs_by_huc.py similarity index 52% rename from src/gms/filter_gms_inputs_by_huc.py rename to src/filter_inputs_by_huc.py index 66ed631eb..4543adacc 100755 --- a/src/gms/filter_gms_inputs_by_huc.py +++ b/src/filter_inputs_by_huc.py @@ -3,7 +3,7 @@ import pandas as pd import argparse -def filter_gms_inputs_by_huc(gms_inputs,hucs,gms_outputs): +def filter_inputs_by_huc(fim_inputs, hucs, fim_outputs): try: with open(hucs[0]) as hf: @@ -11,23 +11,23 @@ def filter_gms_inputs_by_huc(gms_inputs,hucs,gms_outputs): except FileNotFoundError: hucsList = set(hucs) - gms_inputs = pd.read_csv(gms_inputs,header=None,dtype=str) - gms_inputs_mask = gms_inputs.loc[:,0].isin(hucsList) - gms_inputs = gms_inputs.loc[gms_inputs_mask,:] + fim_inputs = pd.read_csv(fim_inputs,header=None,dtype=str) + fim_inputs_mask = fim_inputs.loc[:,0].isin(hucsList) + fim_inputs = fim_inputs.loc[fim_inputs_mask,:] - assert len(gms_inputs) > 0, "Filtered GMS list is empty" + assert len(fim_inputs) > 0, "Filtered FIM list is empty" - gms_inputs.to_csv(gms_outputs,index=False,header=False) + fim_inputs.to_csv(fim_outputs, index=False, header=False) if __name__ == '__main__': # Parse arguments. parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.') - parser.add_argument('-g','--gms-inputs',help='Raster of elevation.',required=True) + parser.add_argument('-g','--fim-inputs',help='Raster of elevation.',required=True) parser.add_argument('-u','--hucs',help='Raster of elevation.',required=True,nargs='+') - parser.add_argument('-o','--gms-outputs',help='Raster of elevation.',required=True) + parser.add_argument('-o','--fim-outputs',help='Raster of elevation.',required=True) args = vars(parser.parse_args()) - filter_gms_inputs_by_huc(**args) + filter_inputs_by_huc(**args) diff --git a/src/gms/finalize_srcs.py b/src/finalize_srcs.py similarity index 100% rename from src/gms/finalize_srcs.py rename to src/finalize_srcs.py diff --git a/src/generate_branch_list.py b/src/generate_branch_list.py index 6e1240de4..b25dab876 100755 --- a/src/generate_branch_list.py +++ b/src/generate_branch_list.py @@ -5,7 +5,7 @@ import pandas as pd import sys -sys.path.append('/foss_fim/src/gms/') +#sys.path.append('/foss_fim/src') from stream_branches import StreamNetwork def generate_branch_list(stream_network_dissolved, branch_id_attribute, diff --git a/src/gms/__init__.py b/src/gms/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/gms/aggregate_branch_lists.py b/src/gms/aggregate_branch_lists.py deleted file mode 100755 index f83eb8c72..000000000 --- a/src/gms/aggregate_branch_lists.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import pandas as pd -from os import environ -from os.path import join -from glob import glob - - -def aggregate_inputs_for_gms(huc_list, output_dir, output_file_name): - - # bash will send huclist in as a colletion and not a string - if isinstance(huc_list, list): - huc_list_file = huc_list[0] - else: - huc_list_file = huc_list - print(huc_list_file) - try: - huc_list = pd.read_csv(huc_list_file,header=None,dtype=str).loc[:,0].tolist() - except FileNotFoundError: - pass - - hucs = set(huc_list) - - # get branch lists - branch_id_files = glob(join(output_dir,'*','branch_id.lst')) - - all_huc_numbers,all_bids = [],[] - for bid_file in branch_id_files: - huc_number = bid_file.split('/')[-2] - - if huc_number in hucs: - bids = pd.read_csv(bid_file,header=None).loc[:,0].tolist() - huc_number_list = [huc_number] * len(bids) - - all_bids += bids - all_huc_numbers += huc_number_list - - output = pd.DataFrame({ - 'huc': all_huc_numbers, - 'branch' : all_bids - }) - - output.to_csv(join(output_dir, output_file_name),index=False,header=False) - - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Aggregate GMS Inputs') - parser.add_argument('-d','--output_dir', help='output run data directory', required=True) - parser.add_argument('-f','--output_file_name', help='output file name', required=True) - parser.add_argument('-l','--huc_list', help='huc list', required=True,nargs='+') - - args = vars(parser.parse_args()) - - aggregate_inputs_for_gms(**args) diff --git a/src/gms/generate_branch_list.py b/src/gms/generate_branch_list.py deleted file mode 100755 index 269263f63..000000000 --- a/src/gms/generate_branch_list.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -import os -import pandas as pd -from stream_branches import StreamNetwork -import argparse - -def Generate_branch_list(stream_network_dissolved, branch_id_attribute, output_branch_list, branch_zero): - - if os.path.exists(stream_network_dissolved): - # load stream network - stream_network_dissolved = StreamNetwork.from_file( stream_network_dissolved, - branch_id_attribute=branch_id_attribute ) - - # reduce to branch id attribute and convert to pandas df - stream_network_dissolved = stream_network_dissolved.loc[:,branch_id_attribute] - - # write - stream_network_dissolved.to_csv(output_branch_list,sep= " ",index=False,header=False) - - # Add branch zero ID to branch list - if branch_zero: - with open(output_branch_list, 'a') as branch_lst: - branch_lst.write(f'{branch_zero}') - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Create branch list') - parser.add_argument('-d','--stream-network-dissolved', help='Dissolved stream network', required=True) - parser.add_argument('-b','--branch-id-attribute', help='Branch ID attribute to use in dissolved stream network', required=True) - parser.add_argument('-o','--output-branch-list', help='Output branch list', required=True) - parser.add_argument('-z','--branch-zero', help='Optional Branch Zero ID (str) to be added to the branch list. Usually this will be "0".', required=False) - - args = vars(parser.parse_args()) - - Generate_branch_list(**args) diff --git a/src/gms/remove_error_branches.py b/src/gms/remove_error_branches.py deleted file mode 100755 index 30afbc44b..000000000 --- a/src/gms/remove_error_branches.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 - -# Removes branches with Exit status: 61 following split_flows.py. -# Removes both the branch folder and the reference in gms_inputs.csv - -import os -import argparse -import pandas as pd -import shutil - -def remove_error_branches(logfile, gms_inputs): - if os.path.isfile(logfile): - try: - errors_df = pd.read_csv(logfile, sep=':', header=None) - except pd.errors.EmptyDataError: - print('\nLog file is empty. Skipping this HUC.\n') - return - - gms_inputs_df = pd.read_csv(gms_inputs, header=None, dtype={0:str,1:str}) - - # Make copy of gms_inputs.csv - gms_inputs_copy = os.path.splitext(gms_inputs)[0] + '_original.csv' - if not os.path.isfile(gms_inputs_copy): - gms_inputs_df.to_csv(gms_inputs_copy, header=False, index=False) - - gms_inputs_removed = os.path.splitext(gms_inputs)[0] + '_removed.csv' - if not os.path.isfile(gms_inputs_removed): - error_branches = None - else: - error_branches = pd.read_csv(gms_inputs_removed, header=None, dtype=str) - - first_occurrence = [] - for i, row in errors_df.iterrows(): - error_code = row[2] - - if error_code == 61: - dirname, basename = os.path.split(row[0]) - - filename = os.path.splitext(basename)[0] - - print(f"Removing {filename}") - - split = str.split(filename, '_') - - huc = split[0] - branch = split[3] - - if huc not in first_occurrence: - # Ignore previous removals for this HUC - if error_branches is not None: - error_branches = error_branches[error_branches[0] != huc] - - first_occurrence.append(huc) - - output_dir = os.path.split(os.path.split(dirname)[0])[0] - branch_dir = os.path.join(output_dir, huc, 'branches', branch) - if os.path.exists(branch_dir): - shutil.rmtree(branch_dir) - - # Remove bad branch from DataFrame - if branch in gms_inputs_df.loc[:,1].values: - gms_inputs_df = gms_inputs_df.drop(index=gms_inputs_df[gms_inputs_df.loc[:,1]==branch].index[0]) - - tmp_df = pd.DataFrame([huc, branch]).T - if error_branches is None: - error_branches = tmp_df - else: - error_branches = pd.concat([error_branches, tmp_df]) - - # Save list of removed branches - if error_branches is not None and len(error_branches) > 0: - pd.DataFrame(error_branches).to_csv(gms_inputs_removed, header=False, index=False) - - # Overwrite gms_inputs.csv with error branches removed - gms_inputs_df.to_csv(gms_inputs, header=False, index=False) - - print('\nDone removing error branches\n') - - else: - print('\nDone -- no branches to remove') - - else: - print('\nNo log file found\n') - - -if __name__ == '__main__': - - # parse arguments - parser = argparse.ArgumentParser(description='Remove branches with Exit status: 61') - parser.add_argument('-f','--logfile', help='Location of non_zero_exit_codes.log', required=True) - parser.add_argument('-g','--gms-inputs', help='Location of gms_inputs.csv', required=True) - - # extract to dictionary - args = vars(parser.parse_args()) - - remove_error_branches(args['logfile'], args['gms_inputs']) \ No newline at end of file diff --git a/src/gms/run_by_unit.sh b/src/gms/run_by_unit.sh deleted file mode 100755 index 25d5aeec8..000000000 --- a/src/gms/run_by_unit.sh +++ /dev/null @@ -1,272 +0,0 @@ -#!/bin/bash -e - -## INITIALIZE TOTAL TIME TIMER ## -T_total_start - -## SOURCE BASH FUNCTIONS -source $srcDir/bash_variables.env - -## SET OUTPUT DIRECTORY FOR UNIT ## -hucNumber="$1" - -: ' - Even though check_input_hucs at gms_run_unit validate that all values - are numbers, sometimes the huc list can come in as windows incoded and not - unix encoded. It can get missed but tee and time can parse it wrong. - so, we will strip a slash of the end if it exists, the re-validat that the - value is a number. (Note: doesn''t seem to work all of the time for encoding - issues (??)) -' -re='^[0-9]+$' -if ! [[ $hucNumber =~ $re ]] ; then - echo "Error: hucNumber is not a number" >&2; exit 1 -fi - -outputHucDataDir=$outputRunDataDir/$hucNumber -outputBranchDataDir=$outputHucDataDir/branches -current_branch_id=$branch_zero_id - -## huc data -if [ -d "$outputHucDataDir" ]; then - if [ $overwrite -eq 1 ]; then - rm -rf $outputHucDataDir - else - echo "Output dir $outputHucDataDir exists. Use overwrite -o to run." - fi -fi - -# make outputs directory -mkdir -p $outputHucDataDir - -# make branches outputs directory -if [ ! -d "$outputBranchDataDir" ]; then - mkdir -p $outputBranchDataDir -fi - -## SET VARIABLES AND FILE INPUTS ## -hucUnitLength=${#hucNumber} -huc4Identifier=${hucNumber:0:4} -huc2Identifier=${hucNumber:0:2} -input_NHD_WBHD_layer=WBDHU$hucUnitLength - -input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Identifier".gpkg - -## START MESSAGE ## -echo -e $startDiv"Processing HUC: $hucNumber ..."$stopDiv - -# Define the landsea water body mask using either Great Lakes or Ocean polygon input # -if [[ $huc2Identifier == "04" ]] ; then - input_LANDSEA=$input_GL_boundaries - echo -e "Using $input_LANDSEA for water body mask (Great Lakes)" -else - input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg -fi - -## GET WBD ## -echo -e $startDiv"Get WBD $hucNumber"$stopDiv -date -u -Tstart -ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS $outputHucDataDir/wbd.gpkg $input_WBD_gdb $input_NHD_WBHD_layer -where "HUC$hucUnitLength='$hucNumber'" -Tcount - -## Subset Vector Layers ## -echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv -date -u -Tstart - -cmd_args=" -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg" -cmd_args+=" -b $outputHucDataDir/nwm_subset_streams.gpkg" -cmd_args+=" -d $hucNumber" -cmd_args+=" -e $outputHucDataDir/nwm_headwater_points_subset.gpkg" -cmd_args+=" -f $outputHucDataDir/wbd_buffered.gpkg" -cmd_args+=" -g $outputHucDataDir/wbd.gpkg" -cmd_args+=" -i $input_DEM" -cmd_args+=" -j $input_DEM_domain" -cmd_args+=" -l $input_nwm_lakes" -cmd_args+=" -m $input_nwm_catchments" -cmd_args+=" -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg" -cmd_args+=" -r $input_NLD" -cmd_args+=" -v $input_LANDSEA" -cmd_args+=" -w $input_nwm_flows" -cmd_args+=" -x $outputHucDataDir/LandSea_subset.gpkg" -cmd_args+=" -y $input_nwm_headwaters" -cmd_args+=" -z $outputHucDataDir/nld_subset_levees.gpkg" -cmd_args+=" -wb $wbd_buffer" -cmd_args+=" -lpf $input_nld_levee_protected_areas" -cmd_args+=" -lps $outputHucDataDir/LeveeProtectedAreas_subset.gpkg" - -Tcount -#echo "$cmd_args" -python3 $srcDir/clip_vectors_to_wbd.py $cmd_args - -: ' -python3 $srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nwm_headwaters -v $input_LANDSEA -lpf $input_nld_levee_protected_areas -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nwm_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -lps $outputHucDataDir/LeveeProtectedAreas_subset.gpkg -wb $wbd_buffer -i $input_DEM -j $input_DEM_domain -' - -## Clip WBD8 ## -echo -e $startDiv"Clip WBD8"$stopDiv -date -u -Tstart -ogr2ogr -f GPKG -t_srs $DEFAULT_FIM_PROJECTION_CRS -clipsrc $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd8_clp.gpkg $inputDataDir/wbd/WBD_National.gpkg WBDHU8 -Tcount - -## DERIVE LEVELPATH ## -echo -e $startDiv"Generating Level Paths for $hucNumber"$stopDiv -date -u -Tstart -$srcDir/gms/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -v -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg - -# test if we received a non-zero code back from derive_level_paths.py -subscript_exit_code=$? -# we have to retrow it if it is not a zero (but it will stop further execution in this script) -if [ $subscript_exit_code -ne 0 ]; then exit $subscript_exit_code; fi -Tcount - -## STREAM BRANCH POLYGONS -echo -e $startDiv"Generating Stream Branch Polygons for $hucNumber"$stopDiv -date -u -Tstart -$srcDir/gms/buffer_stream_branches.py -a $input_DEM_domain -s $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -i $branch_id_attribute -d $branch_buffer_distance_meters -b $outputHucDataDir/branch_polygons.gpkg -v -Tcount - -## CREATE BRANCHID LIST FILE -echo -e $startDiv"Create file of branch ids for $hucNumber"$stopDiv -date -u -Tstart -$srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -z $branch_zero_id -Tcount - -## CREATE BRANCH ZERO ## -echo -e $startDiv"Creating branch zero for $hucNumber"$stopDiv -outputCurrentBranchDataDir=$outputBranchDataDir/$branch_zero_id - -## OVERWRITE -if [ -d "$outputCurrentBranchDataDir" ];then - if [ $overwrite -eq 1 ]; then - rm -rf $outputCurrentBranchDataDir - else - echo "GMS branch data directories for $hucNumber - $branch_zero_id already exist. Use -o/--overwrite to continue" - exit 1 - fi -fi - -## MAKE OUTPUT BRANCH DIRECTORY -mkdir -p $outputCurrentBranchDataDir - -## CLIP RASTERS -echo -e $startDiv"Clipping rasters to branches $hucNumber $branch_zero_id"$stopDiv -# Note: don't need to use gdalwarp -cblend as we are using a buffered wbd -date -u -Tstart -[ ! -f $outputCurrentBranchDataDir/dem_meters.tif ] && \ -gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -t_srs $DEFAULT_FIM_PROJECTION_CRS $input_DEM $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -Tcount - -## GET RASTER METADATA -echo -e $startDiv"Get DEM Metadata $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif) - -## RASTERIZE NLD MULTILINES ## -echo -e $startDiv"Rasterize all NLD multilines using zelev vertices $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -# REMAINS UNTESTED FOR AREAS WITH LEVEES -[ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputCurrentBranchDataDir/nld_subset_levees_$branch_zero_id.tif -Tcount - -## BURN LEVEES INTO DEM ## -echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -# REMAINS UNTESTED FOR AREAS WITH LEVEES -[ -f $outputCurrentBranchDataDir/nld_subset_levees.tif ] && \ -python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -nld $outputCurrentBranchDataDir/nld_subset_levees_$branch_zero_id.tif -out $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -Tcount - -## RASTERIZE REACH BOOLEAN (1 & 0) ## -echo -e $startDiv"Rasterize Reach Boolean $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_subset_streams.gpkg $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif -Tcount - -## RASTERIZE NWM Levelpath HEADWATERS (1 & 0) ## -echo -e $startDiv"Rasterize NWM Headwaters $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_headwater_points_subset.gpkg $outputCurrentBranchDataDir/headwaters_$branch_zero_id.tif -Tcount - -## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. -# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. -echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/agreedem.py -r $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif -d $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -w $outputCurrentBranchDataDir -g $outputCurrentBranchDataDir/temp_work -o $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif -b $agree_DEM_buffer -sm 10 -sh 1000 -Tcount - -## PIT REMOVE BURNED DEM ## -echo -e $startDiv"Pit remove Burned DEM $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -rd_depression_filling $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif -Tcount - -## D8 FLOW DIR ## -echo -e $startDiv"D8 Flow Directions on Burned DEM $hucNumber $branch_zero_id"$stopDiv -date -u -Tstart -mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$branch_zero_id.tif -Tcount - -## PRODUCE THE REM AND OTHER HAND FILE OUTPUTS ## -export hucNumber=$hucNumber -export current_branch_id=$current_branch_id -export outputCurrentBranchDataDir=$outputCurrentBranchDataDir -export outputHucDataDir=$outputHucDataDir -export ndv=$ndv -export xmin=$xmin -export ymin=$ymin -export xmax=$xmax -export ymax=$ymax -export ncols=$ncols -export nrows=$nrows - -## PRODUCE BRANCH ZERO HAND -$srcDir/gms/delineate_hydros_and_produce_HAND.sh "unit" - -## CREATE USGS GAGES FILE -if [ -f $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then - echo -e $startDiv"Assigning USGS gages to branches for $hucNumber"$stopDiv - date -u - Tstart - python3 -m memory_profiler $srcDir/usgs_gage_unit_setup.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -nwm $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -o $outputHucDataDir/usgs_subset_gages.gpkg -huc $hucNumber -ahps $inputDataDir/ahps_sites/nws_lid.gpkg -bzero_id $branch_zero_id - Tcount -fi - -## USGS CROSSWALK ## -if [ -f $outputHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then - echo -e $startDiv"USGS Crosswalk $hucNumber $branch_zero_id"$stopDiv - date -u - Tstart - python3 $srcDir/usgs_gage_crosswalk.py -gages $outputHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg -flows $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$branch_zero_id.gpkg -cat $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$branch_zero_id.gpkg -dem $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -dem_adj $outputCurrentBranchDataDir/dem_thalwegCond_$branch_zero_id.tif -outtable $outputCurrentBranchDataDir/usgs_elev_table.csv -b $branch_zero_id - Tcount -fi - -## CLEANUP BRANCH ZERO OUTPUTS ## -echo -e $startDiv"Cleaning up outputs in branch zero $hucNumber"$stopDiv -$srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list_for_units -b 0 - - -## REMOVE FILES FROM DENY LIST ## -if [ -f $deny_unit_list ]; then - echo -e $startDiv"Remove files $hucNumber"$stopDiv - date -u - Tstart - $srcDir/gms/outputs_cleanup.py -d $outputHucDataDir -l $deny_unit_list -b $hucNumber - Tcount -fi diff --git a/src/gms/test_new_crosswalk.sh b/src/gms/test_new_crosswalk.sh deleted file mode 100755 index 620c94223..000000000 --- a/src/gms/test_new_crosswalk.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -e - - -python3 /foss_fim/src/gms/crosswalk_nwm_demDerived.py -n /data/outputs/first_batch_test_FR_c/12090301/nwm_subset_streams.gpkg -d /data/outputs/first_batch_test_FR_c/12090301/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -v -c /data/temp/continuity/cross_walk_$1.csv -e /data/temp/continuity/demDerived_crosswalked_$1.gpkg -m /data/temp/continuity/nwm_traversal_$1.gpkg -w /data/temp/continuity/wbd.gpkg -a $1 diff --git a/src/gms/time_and_tee_run_by_branch.sh b/src/gms/time_and_tee_run_by_branch.sh deleted file mode 100755 index 989c9da71..000000000 --- a/src/gms/time_and_tee_run_by_branch.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -e - -echo -echo "================================================================================" - -/usr/bin/time -v $srcDir/gms/run_by_branch.sh $1 $2 |& tee $outputRunDataDir/logs/branch/$1_gms_branch_$2.log - -#exit ${PIPESTATUS[0]} -return_codes=( "${PIPESTATUS[@]}" ) - -# we do this way instead of working directly with stderr and stdout -# as they were messing with output logs which we always want. -for code in "${return_codes[@]}" -do - # Make an extra copy of the branch log in a new folder - # Note: It was tricky to load in the fim_enum into bash, so we will just - # go with the code for now - if [ $code -eq 61 ]; then - echo - echo "***** Branch has no valid flowlines *****" - elif [ $code -ne 0 ]; then - echo - echo "***** An error has occured *****" - cp $outputRunDataDir/logs/branch/$1_gms_branch_$2.log $outputRunDataDir/branch_errors - fi -done - -echo "================================================================================" -exit diff --git a/src/gms/time_and_tee_run_by_unit.sh b/src/gms/time_and_tee_run_by_unit.sh deleted file mode 100755 index bae50e1ae..000000000 --- a/src/gms/time_and_tee_run_by_unit.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -e - -echo -echo "================================================================================" - -/usr/bin/time -v $srcDir/gms/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/unit/$1_gms_unit.log - -#exit ${PIPESTATUS[0]} -return_codes=( "${PIPESTATUS[@]}" ) - -# we do this way instead of working directly with stderr and stdout -# as they were messing with output logs which we always want. -for code in "${return_codes[@]}" -do - # Make an extra copy of the branch log in a new folder if an error - # Note: It was tricky to load in the fim_enum into bash, so we will just - # go with the code for now - if [ $code -eq 60 ]; then - echo - echo "***** Unit has no valid branches *****" - elif [ $code -ne 0 ]; then - echo - echo "***** An error has occured *****" - cp $outputRunDataDir/logs/unit/$1_gms_unit.log $outputRunDataDir/unit_errors - fi -done - -echo "================================================================================" -exit diff --git a/src/gms/make_rem.py b/src/make_rem.py similarity index 100% rename from src/gms/make_rem.py rename to src/make_rem.py diff --git a/src/gms/mask_dem.py b/src/mask_dem.py similarity index 100% rename from src/gms/mask_dem.py rename to src/mask_dem.py diff --git a/src/output_cleanup.py b/src/output_cleanup.py deleted file mode 100755 index 34b83672a..000000000 --- a/src/output_cleanup.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -import os -import argparse -from utils.shared_functions import mem_profile - -################################## -## -#### Deprecated (as part of fim_run): Oct 1, 2022 #### -## -################################## - - -@mem_profile -def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing): - ''' - Processes all the final output files to cleanup and add post-processing - - Parameters - ---------- - huc_number : STR - The HUC - output_folder_path : STR - Path to the outputs for the specific huc - additional_whitelist : STR - Additional list of files to keep during a production run - is_production : BOOL - Determine whether or not to only keep whitelisted production files - is_viz_post_processing : BOOL - Determine whether or not to process outputs for Viz - ''' - - ################################## - ## - #### Deprecated (as part of fim_run): Oct 1, 2022 #### - print('#### Deprecated (as part of fim_run): Oct 1, 2022 ####') - ## - ################################## - - - # List of files that will be saved during a production run - production_whitelist = [ - 'rem_zeroed_masked.tif', - 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', - 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', - 'gw_catchments_reaches_filtered_addedAttributes.tif', - 'hydroTable.csv', - 'src.json', - 'small_segments.csv', - 'bathy_crosswalk_calcs.csv', - 'bathy_stream_order_calcs.csv', - 'bathy_thalweg_flag.csv', - 'bathy_xs_area_hydroid_lookup.csv', - 'src_full_crosswalked.csv', - 'usgs_elev_table.csv', - 'hand_ref_elev_table.csv', - ] - - # List of files that will be saved during a viz run - viz_whitelist = [ - 'rem_zeroed_masked.tif', - 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', - 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', - 'gw_catchments_reaches_filtered_addedAttributes.tif', - 'hydroTable.csv', - 'src.json', - 'small_segments.csv', - 'src_full_crosswalked.csv', - 'demDerived_reaches_split_points.gpkg', - 'flowdir_d8_burned_filled.tif', - 'dem_thalwegCond.tif' - ] - - # If "production" run, only keep whitelisted files - if is_production and not is_viz_post_processing: - whitelist_directory(output_folder_path, production_whitelist, additional_whitelist) - - # If Viz post-processing is enabled, form output files to Viz specifications - if is_viz_post_processing: - # Step 1, keep only files that Viz needs - whitelist_directory(output_folder_path, viz_whitelist, additional_whitelist) - - -@mem_profile -def whitelist_directory(directory_path, whitelist, additional_whitelist): - # Add any additional files to the whitelist that the user wanted to keep - if additional_whitelist: - whitelist = whitelist + additional_whitelist - - # Delete any non-whitelisted files - directory = os.fsencode(directory_path) - for file in os.listdir(directory_path): - filename = os.fsdecode(file) - if filename not in whitelist: - os.remove(os.path.join(directory_path, filename)) - - -if __name__ == '__main__': - #Parse arguments - parser = argparse.ArgumentParser(description = 'Cleanup output files') - parser.add_argument('huc_number', type=str, help='The HUC') - parser.add_argument('output_folder_path', type=str, help='Path to the outputs for the specific huc') - parser.add_argument('-w', '--additional_whitelist', type=str, help='List of additional files to keep in a production run',default=None,nargs="+") - parser.add_argument('-p', '--is_production', help='Keep only white-listed files for production runs', action='store_true') - parser.add_argument('-v', '--is_viz_post_processing', help='Formats output files to be useful for Viz', action='store_true') - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - # Rename variable inputs - huc_number = args['huc_number'] - output_folder_path = args['output_folder_path'] - additional_whitelist = args['additional_whitelist'] - is_production = args['is_production'] - is_viz_post_processing = args['is_viz_post_processing'] - - # Run output_cleanup - output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing) diff --git a/src/gms/outputs_cleanup.py b/src/outputs_cleanup.py similarity index 100% rename from src/gms/outputs_cleanup.py rename to src/outputs_cleanup.py diff --git a/src/process_branch.sh b/src/process_branch.sh index 627cfa182..bd1d4d90f 100755 --- a/src/process_branch.sh +++ b/src/process_branch.sh @@ -1,13 +1,13 @@ #!/bin/bash -e -# it is strongly recommended that you do not call directly to src/gms/run_by_branch.sh +# it is strongly recommended that you do not call directly to src/run_by_branch.sh # but call this file and let is call run_by_branch. # This file will auto trap any exceptions from run_by_branch. # also.. remember.. that this file will rarely need to be called (but can be) # as it is usually called through a parallelizing iterator in run_unit_wb.sh -# this also has no named command line arguments, onlly positional args. +# this also has no named command line arguments, only positional args. runName=$1 hucNumber=$2 @@ -18,7 +18,7 @@ export outputRunDataDir=$outputDataDir/$runName branchLogFileName=$outputRunDataDir/logs/branch/"$hucNumber"_branch_"$branchId".log branch_list_csv_file=$outputRunDataDir/$hucNumber/branch_ids.csv -/usr/bin/time -v $srcDir/gms/run_by_branch.sh $hucNumber $branchId 2>&1 | tee $branchLogFileName +/usr/bin/time -v $srcDir/run_by_branch.sh $hucNumber $branchId 2>&1 | tee $branchLogFileName #exit ${PIPESTATUS[0]} return_codes=( "${PIPESTATUS[@]}" ) diff --git a/src/gms/query_vectors_by_branch_polygons.py b/src/query_vectors_by_branch_polygons.py similarity index 100% rename from src/gms/query_vectors_by_branch_polygons.py rename to src/query_vectors_by_branch_polygons.py diff --git a/src/gms/reset_mannings.py b/src/reset_mannings.py similarity index 99% rename from src/gms/reset_mannings.py rename to src/reset_mannings.py index df911bbb9..888e156e3 100755 --- a/src/gms/reset_mannings.py +++ b/src/reset_mannings.py @@ -5,7 +5,7 @@ import argparse import os from glob import iglob,glob -from gms.stream_branches import StreamNetwork +from stream_branches import StreamNetwork def Reset_mannings(hydrofabric_dir,mannings_value,overwrite_files=False): diff --git a/src/gms/run_by_branch.sh b/src/run_by_branch.sh similarity index 89% rename from src/gms/run_by_branch.sh rename to src/run_by_branch.sh index 2c141d282..3bd8f8658 100755 --- a/src/gms/run_by_branch.sh +++ b/src/run_by_branch.sh @@ -52,7 +52,7 @@ Tcount echo -e $startDiv"Clipping rasters to branches $hucNumber $current_branch_id" date -u Tstart -$srcDir/gms/clip_rasters_to_branches.py -d $current_branch_id -b $outputHucDataDir/branch_polygons.gpkg -i $branch_id_attribute -r $outputBranchDataDir/$branch_zero_id/dem_meters_$branch_zero_id.tif $outputBranchDataDir/$branch_zero_id/flowdir_d8_burned_filled_$branch_zero_id.tif -c $outputCurrentBranchDataDir/dem_meters.tif $outputCurrentBranchDataDir/flowdir_d8_burned_filled.tif -v +$srcDir/clip_rasters_to_branches.py -d $current_branch_id -b $outputHucDataDir/branch_polygons.gpkg -i $branch_id_attribute -r $outputBranchDataDir/$branch_zero_id/dem_meters_$branch_zero_id.tif $outputBranchDataDir/$branch_zero_id/flowdir_d8_burned_filled_$branch_zero_id.tif -c $outputCurrentBranchDataDir/dem_meters.tif $outputCurrentBranchDataDir/flowdir_d8_burned_filled.tif -v Tcount ## GET RASTER METADATA @@ -88,7 +88,7 @@ export xmax=$xmax export ymax=$ymax export ncols=$ncols export nrows=$nrows -$srcDir/gms/delineate_hydros_and_produce_HAND.sh "branch" +$srcDir/delineate_hydros_and_produce_HAND.sh "branch" ## USGS CROSSWALK ## if [ -f $outputHucDataDir/usgs_subset_gages.gpkg ]; then @@ -104,7 +104,7 @@ if [ -f $deny_branches_list ]; then echo -e $startDiv"Remove files $hucNumber $current_branch_id" date -u Tstart - $srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branches_list -b $current_branch_id + $srcDir/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branches_list -b $current_branch_id Tcount fi diff --git a/src/run_unit_wb.sh b/src/run_unit_wb.sh index e1516d9df..e7d5e2d16 100755 --- a/src/run_unit_wb.sh +++ b/src/run_unit_wb.sh @@ -85,7 +85,7 @@ Tcount echo -e $startDiv"Generating Level Paths for $hucNumber" date -u Tstart -$srcDir/gms/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg +$srcDir/derive_level_paths.py -i $outputHucDataDir/nwm_subset_streams.gpkg -b $branch_id_attribute -r "ID" -o $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -e $outputHucDataDir/nwm_headwaters.gpkg -c $outputHucDataDir/nwm_catchments_proj_subset.gpkg -t $outputHucDataDir/nwm_catchments_proj_subset_levelPaths.gpkg -n $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg -w $outputHucDataDir/nwm_lakes_proj_subset.gpkg # test if we received a non-zero code back from derive_level_paths.py subscript_exit_code=$? @@ -97,7 +97,7 @@ Tcount echo -e $startDiv"Generating Stream Branch Polygons for $hucNumber" date -u Tstart -$srcDir/gms/buffer_stream_branches.py -a $input_DEM_domain -s $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -i $branch_id_attribute -d $branch_buffer_distance_meters -b $outputHucDataDir/branch_polygons.gpkg +$srcDir/buffer_stream_branches.py -a $input_DEM_domain -s $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -i $branch_id_attribute -d $branch_buffer_distance_meters -b $outputHucDataDir/branch_polygons.gpkg Tcount ## CREATE BRANCHID LIST FILE @@ -198,7 +198,7 @@ export ncols=$ncols export nrows=$nrows ## PRODUCE BRANCH ZERO HAND -$srcDir/gms/delineate_hydros_and_produce_HAND.sh "unit" +$srcDir/delineate_hydros_and_produce_HAND.sh "unit" ## CREATE USGS GAGES FILE if [ -f $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then @@ -220,7 +220,7 @@ fi ## CLEANUP BRANCH ZERO OUTPUTS ## echo -e $startDiv"Cleaning up outputs in branch zero $hucNumber" -$srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list -b $branch_zero_id +$srcDir/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list -b $branch_zero_id ## REMOVE FILES FROM DENY LIST ## @@ -228,7 +228,7 @@ if [ -f $deny_unit_list ]; then echo -e $startDiv"Remove files $hucNumber" date -u Tstart - $srcDir/gms/outputs_cleanup.py -d $outputHucDataDir -l $deny_unit_list -b $hucNumber + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_unit_list -b $hucNumber Tcount fi @@ -249,11 +249,11 @@ parallel --eta --timeout $branch_timeout -j $jobBranchLimit --joblog $branchSumm if [ "$has_deny_branch_zero_override" == "1" ] then echo -e $startDiv"Second cleanup of files for branch zero (none default)" - $srcDir/gms/outputs_cleanup.py -d $outputHucDataDir -l $deny_branch_zero_list -b 0 + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_branch_zero_list -b 0 else echo -e $startDiv"Second cleanup of files for branch zero using the default branch deny list" - $srcDir/gms/outputs_cleanup.py -d $outputHucDataDir -l $deny_branches_list -b 0 + $srcDir/outputs_cleanup.py -d $outputHucDataDir -l $deny_branches_list -b 0 fi echo "---- All huc for $hucNumber branches have been now processed" diff --git a/src/gms/stream_branches.py b/src/stream_branches.py similarity index 100% rename from src/gms/stream_branches.py rename to src/stream_branches.py diff --git a/src/gms/subset_catch_list_by_branch_id.py b/src/subset_catch_list_by_branch_id.py similarity index 100% rename from src/gms/subset_catch_list_by_branch_id.py rename to src/subset_catch_list_by_branch_id.py diff --git a/src/gms/toDo.md b/src/toDo.md similarity index 100% rename from src/gms/toDo.md rename to src/toDo.md diff --git a/src/usgs_gage_aggregate.py b/src/usgs_gage_aggregate.py index 24d6d26c4..1a8993e83 100644 --- a/src/usgs_gage_aggregate.py +++ b/src/usgs_gage_aggregate.py @@ -68,20 +68,20 @@ def agg_function(self): parser = argparse.ArgumentParser(description='Aggregates usgs_elev_table.csv at the HUC level') parser.add_argument('-fim','--fim_directory', help='Input FIM Directory', required=True) - parser.add_argument('-gms','--gms_inputs', help='Input gms_inputs CSV file', required=False) + parser.add_argument('-i','--fim_inputs', help='Input fim_inputs CSV file', required=False) args = vars(parser.parse_args()) fim_directory = args['fim_directory'] - gms_inputs = args['gms_inputs'] + fim_inputs = args['fim_inputs'] assert os.path.isdir(fim_directory), f'{fim_directory} is not a valid directory' - if gms_inputs: - gms_inputs = pd.read_csv(gms_inputs, header=None, names=['huc', 'levpa_id'],dtype=str) + if fim_inputs: + fim_inputs = pd.read_csv(fim_inputs, header=None, names=['huc', 'levpa_id'],dtype=str) - for huc in gms_inputs.huc.unique(): + for huc in fim_inputs.huc.unique(): - branches = gms_inputs.loc[gms_inputs.huc == huc, 'levpa_id'].tolist() + branches = fim_inputs.loc[fim_inputs.huc == huc, 'levpa_id'].tolist() huc = HucDirectory(join(fim_directory, huc), limit_branches=branches) huc.agg_function() diff --git a/src/usgs_gage_unit_setup.py b/src/usgs_gage_unit_setup.py index d3efc61c2..187bb3657 100755 --- a/src/usgs_gage_unit_setup.py +++ b/src/usgs_gage_unit_setup.py @@ -76,24 +76,24 @@ def sjoin_nearest_to_nwm(pnt, lines, union): return int(lines.iloc[queried_index[0]].feature_id.item()) @staticmethod - def filter_gage_branches(gms_inputs_filename): + def filter_gage_branches(fim_inputs_filename): - fim_dir = os.path.dirname(gms_inputs_filename) - gms_inputs = pd.read_csv(gms_inputs_filename, header=None, names=['huc', 'levpa_id'], + fim_dir = os.path.dirname(fim_inputs_filename) + fim_inputs = pd.read_csv(fim_inputs_filename, header=None, names=['huc', 'levpa_id'], dtype={'huc':str, 'levpa_id':str}) for huc_dir in [d for d in os.listdir(fim_dir) if re.search('^\d{8}$', d)]: gage_file = os.path.join(fim_dir, huc_dir, 'usgs_subset_gages.gpkg') if not os.path.isfile(gage_file): - gms_inputs.drop(gms_inputs.loc[gms_inputs.huc == huc_dir].index, inplace=True) + fim_inputs.drop(fim_inputs.loc[fim_inputs.huc == huc_dir].index, inplace=True) continue gages = gpd.read_file(gage_file) level_paths = gages.levpa_id - gms_inputs.drop(gms_inputs.loc[(gms_inputs.huc == huc_dir) & (~gms_inputs.levpa_id.isin(level_paths))].index, inplace=True) + fim_inputs.drop(fim_inputs.loc[(fim_inputs.huc == huc_dir) & (~fim_inputs.levpa_id.isin(level_paths))].index, inplace=True) - gms_inputs.to_csv(gms_inputs_filename, index=False, header=False) + fim_inputs.to_csv(fim_inputs_filename, index=False, header=False) if __name__ == '__main__': @@ -105,7 +105,7 @@ def filter_gage_branches(gms_inputs_filename): parser.add_argument('-o','--output-filename', help='Table to append data', required=True) parser.add_argument('-huc','--huc8-id', help='HUC8 ID (to verify gage location huc)', type=str, required=True) parser.add_argument('-bzero_id','--branch-zero-id', help='Branch zero ID value', type=str, required=True) - parser.add_argument('-ff','--filter-gms-inputs', help='WARNING: only run this parameter if you know exactly what you are doing', required=False) + parser.add_argument('-ff','--filter-fim-inputs', help='WARNING: only run this parameter if you know exactly what you are doing', required=False) args = vars(parser.parse_args()) @@ -115,9 +115,9 @@ def filter_gage_branches(gms_inputs_filename): output_filename = args['output_filename'] huc8 = args['huc8_id'] bzero_id = args['branch_zero_id'] - filter_gms_inputs = args['filter_gms_inputs'] + filter_fim_inputs = args['filter_fim_inputs'] - if not filter_gms_inputs: + if not filter_fim_inputs: usgs_gage_subset = Gage2Branch(usgs_gages_filename, nws_lid_filename, huc8) if usgs_gage_subset.gages.empty: @@ -133,13 +133,12 @@ def filter_gage_branches(gms_inputs_filename): else: ''' - This is an easy way to filter gms_inputs so that only branches with gages will run during gms_run_branch.sh. - You can run this option after gms_run_unit.sh has completed. + This is an easy way to filter fim_inputs so that only branches with gages will run during fim_process_unit_wb.sh. example: - python3 src/usgs_gage_unit_setup.py -gages x -ahps x -nwm x -o x -huc x -ff /data/outputs/test_output/gms_inputs.csv + python3 src/usgs_gage_unit_setup.py -gages x -ahps x -nwm x -o x -huc x -ff /data/outputs/test_output/fim_inputs.csv ''' - assert os.path.isfile(filter_gms_inputs) - Gage2Branch.filter_gage_branches(filter_gms_inputs) + assert os.path.isfile(filter_fim_inputs) + Gage2Branch.filter_gage_branches(filter_fim_inputs) diff --git a/src/utils/fim_enums.py b/src/utils/fim_enums.py index 13c4fc0eb..5af4d8451 100644 --- a/src/utils/fim_enums.py +++ b/src/utils/fim_enums.py @@ -18,12 +18,12 @@ class FIM_exit_codes(Enum): - More advanced combinations of codes can be used and we will keep it simple for now. - Sample usage: import utils/fim_enums - print(FIM_exit_codes.GMS_UNIT_NO_BRANCHES.value) -> 60 [this is used in gms/derive_level_paths.py] + print(FIM_exit_codes.UNIT_NO_BRANCHES.value) -> 60 [this is used in derive_level_paths.py] - For more information : https://docs.python.org/3.11/howto/enum.html and https://docs.python.org/3/library/enum.html ''' - GMS_UNIT_NO_BRANCHES = 60 + UNIT_NO_BRANCHES = 60 NO_FLOWLINES_EXIST = 61 EXCESS_UNIT_ERRORS = 62 \ No newline at end of file diff --git a/tools/gms_tools/combine_crosswalk_tables.py b/tools/combine_crosswalk_tables.py similarity index 100% rename from tools/gms_tools/combine_crosswalk_tables.py rename to tools/combine_crosswalk_tables.py diff --git a/tools/gms_tools/compare_ms_and_non_ms_metrics.py b/tools/compare_ms_and_non_ms_metrics.py similarity index 100% rename from tools/gms_tools/compare_ms_and_non_ms_metrics.py rename to tools/compare_ms_and_non_ms_metrics.py diff --git a/tools/gms_tools/compile_comp_stats.py b/tools/compile_comp_stats.py similarity index 81% rename from tools/gms_tools/compile_comp_stats.py rename to tools/compile_comp_stats.py index c5f8a6458..b2bb0fe08 100755 --- a/tools/gms_tools/compile_comp_stats.py +++ b/tools/compile_comp_stats.py @@ -6,6 +6,14 @@ import argparse import os +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + def Compile_comp_stats(hydrofabric_dirs): @@ -67,6 +75,15 @@ def get_log_files(hydrofabric_dirs): if __name__ == '__main__': + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + + # parse arguments parser = argparse.ArgumentParser(description='Get Comp Stats') parser.add_argument('-y','--hydrofabric_dirs', help='Directory path to FIM hydrofabric by processing unit', required=True,nargs='+') diff --git a/tools/gms_tools/compile_computational_stats.py b/tools/compile_computational_stats.py similarity index 64% rename from tools/gms_tools/compile_computational_stats.py rename to tools/compile_computational_stats.py index f7ab6daed..e9f10cda4 100755 --- a/tools/gms_tools/compile_computational_stats.py +++ b/tools/compile_computational_stats.py @@ -9,6 +9,15 @@ # desired output for branches # dataframe columns: HUC, branch_id, exit status, ,time, ram, +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + def compile_summary(gms_output_dir,ouput=None): unit_summary = join(gms_output_dir,logs, 'summary_gms_unit.log') @@ -18,9 +27,16 @@ def compile_summary(gms_output_dir,ouput=None): branch_summary = pd.read_csv(branch_summary,sep='\t') +if __name__ == '__main__': + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## -if __name__ == '__main__': parser = argparse.ArgumentParser(description='Create stream network level paths') parser.add_argument('-d','--gms-output-dir', help='Input stream network', required=True) diff --git a/tools/composite_inundation.py b/tools/composite_inundation.py index 3eddb202e..d5784594b 100644 --- a/tools/composite_inundation.py +++ b/tools/composite_inundation.py @@ -12,16 +12,24 @@ from tqdm import tqdm from inundation import inundate -from gms_tools.mosaic_inundation import Mosaic_inundation -from gms_tools.inundate_gms import Inundate_gms +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms from utils.shared_functions import FIM_Helpers as fh from utils.shared_variables import elev_raster_ndv +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + + class InundateModel_HUC(object): - def __init__(self, model, source_directory, huc): + def __init__(self, source_directory, huc): - self.model = model self.source_directory = source_directory self.huc = huc @@ -48,13 +56,6 @@ def inundate_huc(self, flows_file, composite_output_dir, output_name, log_file_p if not os.path.isdir(output_huc_dir): os.mkdir(output_huc_dir) - if self.model == "ms": - extent_friendly = "mainstem (MS)" - elif self.model == "fr": - extent_friendly = "full-resolution (FR)" - else: # gms - extent_friendly = "FIM4 GMS" - inundation_map_file = None output_raster_name = os.path.join(output_huc_dir, output_name) @@ -64,12 +65,12 @@ def inundate_huc(self, flows_file, composite_output_dir, output_name, log_file_p log_file = None inundation_list_file = None if (log_file_path != None): - log_file = os.path.join(log_file_path, f"{self.huc}_{self.model}_error_logs.txt") + log_file = os.path.join(log_file_path, f"{self.huc}_error_logs.txt") inundation_list_file = os.path.join(log_file_path, - f"{self.huc}_{self.model}_inundation_file_list.csv") + f"{self.huc}_inundation_file_list.csv") if (verbose): - print(f'... Creating an inundation map for the {extent_friendly}'\ + print(f'... Creating an inundation map for the FIM4'\ f' configuration for HUC {self.huc}...') if self.model in ["fr", "ms"]: @@ -150,7 +151,7 @@ def composite_huc(self, args): composite_model_map_files = [] for model in args["models"]: - # setup original fim/gms processed directory + # setup original fim processed directory if model == "ms" : source_dir = args["fim_dir_ms"] elif model == "fr" : source_dir = args["fim_dir_fr"] else: source_dir = args["gms_dir"] diff --git a/tools/consolidate_metrics.py b/tools/consolidate_metrics.py index d9a80b9f2..0b470631d 100755 --- a/tools/consolidate_metrics.py +++ b/tools/consolidate_metrics.py @@ -20,6 +20,15 @@ pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) + +######################################################## +''' +Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. +''' + +######################################################## + def Consolidate_metrics( benchmarks=['all'],versions=['all'], zones=['total_area'],matching_hucs_only=True, metrics_output_csv=None, @@ -309,6 +318,14 @@ def parse_eval_metadata(file_names,metadata_field): if __name__ == '__main__': + ######################################################## + ''' + Feb 15, 2023 - This file may be deprecated. At a minimum, it needs + a significant review and/or upgrade. + ''' + + ######################################################## + # Parse arguments. parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') parser.add_argument('-b','--benchmarks',help='Allowed benchmarks', required=False, default='all', nargs="+") diff --git a/tools/copy_test_case_folders.py b/tools/copy_test_case_folders.py index 1c05b4428..bd2a1540b 100644 --- a/tools/copy_test_case_folders.py +++ b/tools/copy_test_case_folders.py @@ -6,13 +6,13 @@ import sys # importing python folders in other direcories -sys.path.append('/foss_fim/src/gms/') +sys.path.append('/foss_fim/src/') import aggregate_branch_lists as agg def copy_folders(folder_name_list, source_dir, target_dir, - create_gms_input_list=False, + create_fim_input_list=False, overwrite=False): ''' @@ -29,9 +29,9 @@ def copy_folders(folder_name_list, - target_dir: The root folder where the huc folders will be copied to. Note. All contents of each huc folder, including branch folders if applicable, will be copied, in the extact structure as the source directory. Note: The target folder need not pre-exist. - - create_gms_input_list: If this flag is set to True, after coping the folders, the - "aggregate_branch_lists.py" file will be called in order to make the gms_input.csv file. - The gms_input.csv is required for futher processing such as reprocessing branchs or set up + - create_fim_input_list: If this flag is set to True, after coping the folders, the + "aggregate_branch_lists.py" file will be called in order to make the fim_input.csv file. + The fim_input.csv is required for futher processing such as reprocessing branchs or set up for test cases. - overwrite: if this value is set to true, the entire target_directory will be emptied of its contents as this process starts if the folder exists. @@ -73,11 +73,11 @@ def copy_folders(folder_name_list, print(f"{str(ctr)} folders have been copied to {target_dir}") - if create_gms_input_list == True: + if create_fim_input_list == True: # call this code, which scans each huc (unit) directory looking for the branch_id.lst - # and adds them together to create the gms_inputs.csv file + # and adds them together to create the fim_inputs.csv file # Note: folder_name_list needs to be a huc list to work) - agg.aggregate_inputs_for_gms(folder_name_list, target_dir, "gms_inputs.csv") + agg.aggregate_branch_lists(folder_name_list, target_dir, "gms_inputs.csv") print("gms_inputs.csv created") @@ -87,7 +87,7 @@ def copy_folders(folder_name_list, # and it has to be run on each root folder, one at a time (for now. aka.. no wildcards) # Sample Usage: -#python /foss_fim/tools/copy_test_case_folders.py -f /data/inputs/huc_lists/huc_list_for_alpha_tests_22020420.lst -s /outputs/gms_test_synth/ -t /data/outputs/gms_test_synth_combined -a +#python /foss_fim/tools/copy_test_case_folders.py -f /data/inputs/huc_lists/huc_list_for_alpha_tests_22020420.lst -s /outputs/copy_test_synth/ -t /data/outputs/copy_test_synth_combined -a # NOTE the 'a' at the end meaning go ahead create the gms_input.csv. This is normally # left for the last folder to be copied over. diff --git a/tools/eval_plots.py b/tools/eval_plots.py index d0b041f99..256342015 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -25,7 +25,9 @@ ######################################################################### #Create boxplot ######################################################################### -def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, + title_text, fim_configuration, textbox_str = False, + simplify_legend = False, dest_file = False): ''' Create boxplots. @@ -192,7 +194,9 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot ######################################################################### #Create barplot ######################################################################### -def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, + title_text, fim_configuration, textbox_str = False, + simplify_legend = False, display_values = False, dest_file = False): ''' Create barplots. @@ -344,7 +348,9 @@ def filter_dataframe(dataframe, unique_field): ############################################################################## #Main function to analyze metric csv. ############################################################################## -def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR','PND','MCC','EQUITABLE_THREAT_SCORE'] , spatial = False, fim_1_ms = False, site_barplots = False): +def eval_plots(metrics_csv, workspace, versions = [], + stats = ['CSI','FAR','TPR','PND','MCC','EQUITABLE_THREAT_SCORE'] , spatial = False, + fim_1_ms = False, site_barplots = False): ''' Creates plots and summary statistics using metrics compiled from diff --git a/tools/gms_tools/evaluate_continuity.py b/tools/evaluate_continuity.py similarity index 100% rename from tools/gms_tools/evaluate_continuity.py rename to tools/evaluate_continuity.py diff --git a/tools/gms_tools/find_max_catchment_breadth.py b/tools/find_max_catchment_breadth.py similarity index 100% rename from tools/gms_tools/find_max_catchment_breadth.py rename to tools/find_max_catchment_breadth.py diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py index 743552664..db9328dbf 100755 --- a/tools/generate_categorical_fim_mapping.py +++ b/tools/generate_categorical_fim_mapping.py @@ -14,8 +14,8 @@ sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION from utils.shared_functions import getDriver -from gms_tools.mosaic_inundation import Mosaic_inundation -from gms_tools.inundate_gms import Inundate_gms +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms def generate_categorical_fim(fim_run_dir, source_flow_dir, output_catfim_dir, diff --git a/tools/gms_tools/__init__.py b/tools/gms_tools/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tools/gms_tools/inundate_gms.py b/tools/inundate_gms.py similarity index 94% rename from tools/gms_tools/inundate_gms.py rename to tools/inundate_gms.py index 7b481dd70..e1a90fa67 100755 --- a/tools/gms_tools/inundate_gms.py +++ b/tools/inundate_gms.py @@ -43,8 +43,8 @@ def Inundate_gms( hydrofabric_dir, forecast, num_workers = 1, #logging.basicConfig(filename=log_file, level=logging.INFO) #logging.info('HUC8,BranchID,Exception') - # load gms inputs - hucs_branches = pd.read_csv( os.path.join(hydrofabric_dir,'gms_inputs.csv'), + # load fim inputs + hucs_branches = pd.read_csv( os.path.join(hydrofabric_dir,'fim_inputs.csv'), header=None, dtype= {0:str,1:str} ) @@ -164,18 +164,18 @@ def __inundate_gms_generator( hucs_branches, huc = str(row[0]) branch_id = str(row[1]) - gms_dir = os.path.join(hydrofabric_dir, huc, 'branches') + huc_dir = os.path.join(hydrofabric_dir, huc, 'branches') rem_file_name = 'rem_zeroed_masked_{}.tif'.format(branch_id) - rem_branch = os.path.join( gms_dir, branch_id, rem_file_name ) + rem_branch = os.path.join( huc_dir, branch_id, rem_file_name ) catchments_file_name = f'gw_catchments_reaches_filtered_addedAttributes_{branch_id}.tif' - catchments_branch = os.path.join( gms_dir, branch_id, catchments_file_name ) + catchments_branch = os.path.join( huc_dir, branch_id, catchments_file_name ) - hydroTable_branch = os.path.join( gms_dir,branch_id, 'hydroTable_{}.csv'.format(branch_id) ) + hydroTable_branch = os.path.join( huc_dir, branch_id, 'hydroTable_{}.csv'.format(branch_id) ) xwalked_file_name = f'gw_catchments_reaches_filtered_addedAttributes_crosswalked_{branch_id}.gpkg' - catchment_poly = os.path.join( gms_dir, branch_id, xwalked_file_name ) + catchment_poly = os.path.join( huc_dir, branch_id, xwalked_file_name ) # branch output @@ -224,7 +224,7 @@ def __inundate_gms_generator( hucs_branches, if __name__ == '__main__': # parse arguments - parser = argparse.ArgumentParser(description='Inundate GMS') + parser = argparse.ArgumentParser(description='Inundate FIM') parser.add_argument('-y','--hydrofabric_dir', help='Directory path to FIM hydrofabric by processing unit', required=True) parser.add_argument('-u','--hucs',help='List of HUCS to run',required=False,default=None,type=str,nargs='+') parser.add_argument('-f','--forecast',help='Forecast discharges in CMS as CSV file',required=True) diff --git a/tools/inundate_nation.py b/tools/inundate_nation.py index 74a7c1bb2..a07f9c9d9 100644 --- a/tools/inundate_nation.py +++ b/tools/inundate_nation.py @@ -13,8 +13,8 @@ sys.path.append('/foss_fim/src') from datetime import datetime -from gms_tools.mosaic_inundation import Mosaic_inundation -from gms_tools.inundate_gms import Inundate_gms +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms from inundation import inundate from utils.shared_variables import elev_raster_ndv, PREP_PROJECTION from utils.shared_functions import FIM_Helpers as fh @@ -27,7 +27,8 @@ #DEFAULT_OUTPUT_DIR = '/data/inundation_review/inundate_nation/mosaic_output/' -def inundate_nation(fim_run_dir, output_dir, magnitude_key, flow_file, inc_mosaic, job_number): +def inundate_nation(fim_run_dir, output_dir, magnitude_key, + flow_file, inc_mosaic, job_number): assert os.path.isdir(fim_run_dir), f'ERROR: could not find the input fim_dir location: {fim_run_dir}' @@ -74,10 +75,9 @@ def inundate_nation(fim_run_dir, output_dir, magnitude_key, flow_file, inc_mosai huc_list.append(huc) print('Inundation raw mosaic outputs here: ' + magnitude_output_dir) - - config = "GMS" - - run_inundation([fim_run_dir, huc_list, magnitude_key, magnitude_output_dir, config, flow_file, job_number]) + + run_inundation([fim_run_dir, huc_list, magnitude_key, + magnitude_output_dir, flow_file, job_number]) # Perform mosaic operation if inc_mosaic: @@ -129,7 +129,7 @@ def run_inundation(args): This script is a wrapper for the inundate function and is designed for multiprocessing. Args: - args (list): [fim_run_dir (str), huc_list (list), magnitude (str), magnitude_output_dir (str), config (str), forecast (str), job_number (int)] + args (list): [fim_run_dir (str), huc_list (list), magnitude (str), magnitude_output_dir (str), forecast (str), job_number (int)] """ @@ -137,39 +137,38 @@ def run_inundation(args): huc_list = args[1] magnitude = args[2] magnitude_output_dir = args[3] - config = args[4] - forecast = args[5] - job_number = args[6] + forecast = args[4] + job_number = args[5] # Define file paths for use in inundate(). - inundation_raster = os.path.join(magnitude_output_dir, magnitude + '_' + config + '_inund_extent.tif') + inundation_raster = os.path.join(magnitude_output_dir, magnitude + '_inund_extent.tif') print("Running the NWM recurrence intervals for HUC inundation (extent) for magnitude: " + str(magnitude)) map_file = Inundate_gms( hydrofabric_dir = fim_run_dir, - forecast = forecast, - num_workers = job_number, - hucs = huc_list, - inundation_raster = inundation_raster, - inundation_polygon = None, - depths_raster = None, - verbose = True, - log_file = None, - output_fileNames = None ) + forecast = forecast, + num_workers = job_number, + hucs = huc_list, + inundation_raster = inundation_raster, + inundation_polygon = None, + depths_raster = None, + verbose = True, + log_file = None, + output_fileNames = None ) Mosaic_inundation( map_file, - mosaic_attribute = 'inundation_rasters', - mosaic_output = inundation_raster, - #mask = os.path.join(fim_run_dir,huc8,'wbd.gpkg'), - mask = None, - unit_attribute_name = 'huc8', - nodata = elev_raster_ndv, - workers = 1, - remove_inputs = True, - subset = None, - verbose = True, - is_mosaic_for_gms_branches = True ) + mosaic_attribute = 'inundation_rasters', + mosaic_output = inundation_raster, + #mask = os.path.join(fim_run_dir,huc8,'wbd.gpkg'), + mask = None, + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = True, + subset = None, + verbose = True, + is_mosaic_for_branches = True ) def create_bool_rasters(args): in_raster_dir = args[0] diff --git a/tools/inundation.py b/tools/inundation.py index e2e947d08..188f0bce0 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -270,10 +270,8 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if out_vector_profile is None: out_vector_profile = {'crs' : crs , 'driver' : 'GPKG'} - out_vector_profile['schema'] = { - 'geometry' : 'Polygon', - 'properties' : OrderedDict([('HydroID' , 'int')]) - } + out_vector_profile['schema'] = { 'geometry' : 'Polygon', + 'properties' : OrderedDict([('HydroID' , 'int')]) } # open output inundation polygons if isinstance(inundation_polygon,str): diff --git a/tools/make_boxes_from_bounds.py b/tools/make_boxes_from_bounds.py index b9ea31d45..69b9b5a8b 100755 --- a/tools/make_boxes_from_bounds.py +++ b/tools/make_boxes_from_bounds.py @@ -9,7 +9,9 @@ from foss_fim.src.utils.shared_functions import getDriver -def find_hucs_of_bounding_boxes(bounding_boxes_file,wbd=None,projection_of_boxes='EPSG:4329',wbd_layer='WBDHU8',huc_output_file=None,forecast_output_file=None,bounding_boxes_outfile=None): +def find_hucs_of_bounding_boxes(bounding_boxes_file, wbd=None, projection_of_boxes='EPSG:4329', + wbd_layer='WBDHU8', huc_output_file=None, + forecast_output_file=None, bounding_boxes_outfile=None): # load bounding box file diff --git a/tools/gms_tools/mosaic_inundation.py b/tools/mosaic_inundation.py similarity index 94% rename from tools/gms_tools/mosaic_inundation.py rename to tools/mosaic_inundation.py index ff6165a6c..9a267a8fa 100755 --- a/tools/gms_tools/mosaic_inundation.py +++ b/tools/mosaic_inundation.py @@ -7,7 +7,7 @@ import sys from glob import glob -from gms_tools.overlapping_inundation import OverlapWindowMerge +from overlapping_inundation import OverlapWindowMerge from tqdm import tqdm from utils.shared_variables import elev_raster_ndv from utils.shared_functions import FIM_Helpers as fh @@ -22,7 +22,7 @@ def Mosaic_inundation( map_file, remove_inputs = False, subset = None, verbose = True, - is_mosaic_for_gms_branches = False ): + is_mosaic_for_branches = False ): # Notes: # - If is_mosaic_for_gms_branches is true, the mosaic output name @@ -79,7 +79,7 @@ def Mosaic_inundation( map_file, # fr set with a gms composite map. ag_mosaic_output = mosaic_output - if (is_mosaic_for_gms_branches) and (ag not in mosaic_output): + if (is_mosaic_for_branches) and (ag not in mosaic_output): ag_mosaic_output = fh.append_id_to_file_name(mosaic_output, ag) # change it mosaic_by_unit(inundation_maps_list, @@ -156,8 +156,8 @@ def mosaic_by_unit(inundation_maps_list, required=False, default=False, action='store_true') parser.add_argument('-v','--verbose', help='Remove original input inundation Maps', required=False, default=False, action='store_true') - parser.add_argument('-g','--is-mosaic-for-gms-branches', - help='If the mosaic is for gms branchs, include this arg', + parser.add_argument('-g','--is-mosaic-for-branches', + help='If the mosaic is for branchs, include this arg', required=False, default=False, action='store_true') args = vars(parser.parse_args()) diff --git a/tools/gms_tools/overlapping_inundation.py b/tools/overlapping_inundation.py similarity index 100% rename from tools/gms_tools/overlapping_inundation.py rename to tools/overlapping_inundation.py diff --git a/tools/gms_tools/plots.py b/tools/plots.py similarity index 100% rename from tools/gms_tools/plots.py rename to tools/plots.py diff --git a/tools/run_test_case.py b/tools/run_test_case.py index 9d46ca582..c1e8c58a2 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -4,9 +4,9 @@ import pandas as pd from tools_shared_variables import TEST_CASES_DIR, INPUTS_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES, MAGNITUDE_DICT, elev_raster_ndv -from inundation import inundate -from gms_tools.mosaic_inundation import Mosaic_inundation -from gms_tools.inundate_gms import Inundate_gms +# from inundation import inundate +from mosaic_inundation import Mosaic_inundation +from inundate_gms import Inundate_gms from tools_shared_functions import compute_contingency_stats_from_rasters from utils.shared_functions import FIM_Helpers as fh @@ -144,16 +144,14 @@ def list_all_test_cases(cls, version, archive, benchmark_categories=[]): return test_case_list - def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area='', - inclusion_area_buffer=0, overwrite=True, verbose=False, gms_workers=1): + def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', + inclusion_area_buffer=0, overwrite=True, verbose=False, num_workers_inundate=1): '''Compares a FIM directory with benchmark data from a variety of sources. Parameters ---------- calibrated : bool Whether or not this FIM version is calibrated. - model : str - MS or FR extent of the model. This value will be written to the eval_metadata.json. mask_type : str Mask type to feed into inundation.py. inclusion_area : int @@ -164,7 +162,7 @@ def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area If True, overwites pre-existing test cases within the test_cases directory. verbose : bool If True, prints out all pertinent data. - gms_workers : int + num_workers_branches : int Number of worker processes assigned to GMS processing. ''' @@ -178,6 +176,7 @@ def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area self.stats_modes_list = ['total_area'] # Create paths to fim_run outputs for use in inundate() + ''' if model != 'GMS': self.rem = os.path.join(self.fim_dir, 'rem_zeroed_masked.tif') if not os.path.exists(self.rem): @@ -191,15 +190,16 @@ def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area else: self.catchment_poly = os.path.join(self.fim_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') self.hydro_table = os.path.join(self.fim_dir, 'hydroTable.csv') - + ''' + # Map necessary inputs for inundate(). self.hucs, self.hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' if inclusion_area != '': inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name self.mask_dict.update({inclusion_area_name: {'path': inclusion_area, - 'buffer': int(inclusion_area_buffer), - 'operation': 'include'}}) + 'buffer': int(inclusion_area_buffer), + 'operation': 'include'}}) # Append the concatenated inclusion_area_name and buffer. if inclusion_area_buffer == None: inclusion_area_buffer = 0 @@ -215,14 +215,14 @@ def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area for magnitude in validation_data: for instance in validation_data[magnitude]: # instance will be the lid for AHPS sites and '' for other sites # For each site, inundate the REM and compute aggreement raster with stats - self._inundate_and_compute(magnitude, instance, model=model, verbose=verbose, gms_workers=gms_workers) + self._inundate_and_compute(magnitude, instance, verbose=verbose, num_workers_inundate=num_workers_inundate) # Clean up 'total_area' outputs from AHPS sites if self.is_ahps: self.clean_ahps_outputs(os.path.join(self.dir, magnitude)) # Write out evaluation meta-data - self.write_metadata(calibrated, model) + self.write_metadata(calibrated) except KeyboardInterrupt: print("Program aborted via keyboard interrupt") @@ -235,12 +235,10 @@ def _inundate_and_compute(self, magnitude, lid, compute_only = False, - model = '', verbose = False, - gms_workers = 1): + num_workers_inundate = 1): '''Method for inundating and computing contingency rasters as part of the alpha_test. - Used by both the alpha_test() and composite() methods. - + Parameters ---------- magnitude : str @@ -283,30 +281,31 @@ def _inundate_and_compute(self, # Inundate REM if not compute_only: # composite alpha tests don't need to be inundated - if model == 'GMS': - fh.vprint("Begin GMS Inundation", verbose) - map_file = Inundate_gms( hydrofabric_dir = os.path.dirname(self.fim_dir), - forecast = benchmark_flows, - num_workers = gms_workers, - hucs = self.huc, - inundation_raster = predicted_raster_path, - inundation_polygon = None, - depths_raster = None, - verbose = verbose, - log_file = None, - output_fileNames = None ) - #if (len(map_file) > 0): - fh.vprint("Begin GMS Mosaic", verbose) - Mosaic_inundation( map_file, - mosaic_attribute = 'inundation_rasters', - mosaic_output = predicted_raster_path, - mask = os.path.join(self.fim_dir,'wbd.gpkg'), - unit_attribute_name = 'huc8', - nodata = elev_raster_ndv, - workers = 1, - remove_inputs = True, - subset = None, - verbose = verbose ) + #if model == 'GMS': + fh.vprint("Begin FIM Inundation", verbose) + map_file = Inundate_gms( hydrofabric_dir = os.path.dirname(self.fim_dir), + forecast = benchmark_flows, + num_workers = num_workers_inundate, + hucs = self.huc, + inundation_raster = predicted_raster_path, + inundation_polygon = None, + depths_raster = None, + verbose = verbose, + log_file = None, + output_fileNames = None ) + #if (len(map_file) > 0): + fh.vprint("Begin FIM Mosaic", verbose) + Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = predicted_raster_path, + mask = os.path.join(self.fim_dir,'wbd.gpkg'), + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = True, + subset = None, + verbose = verbose ) + ''' # FIM v3 and before else: fh.vprint("Begin FIM v3 (or earlier) Inundation", verbose) @@ -318,7 +317,8 @@ def _inundate_and_compute(self, quiet=True) if inundate_result != 0: return inundate_result - + ''' + # Create contingency rasters and stats fh.vprint("Begin creating contingency rasters and stats", verbose) if os.path.isfile(predicted_raster_path): @@ -335,15 +335,17 @@ def _inundate_and_compute(self, @classmethod - def run_alpha_test(cls, version, test_id, magnitude, calibrated, model, archive_results=False, - mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True, verbose=False, gms_workers=1): + def run_alpha_test(cls, version, test_id, magnitude, calibrated, archive_results=False, + mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, + overwrite=True, verbose=False, num_workers_inundate=1): + '''Class method for instantiating the test_case class and running alpha_test directly''' alpha_class = cls(test_id, version, archive_results) - alpha_class.alpha_test(calibrated, model, mask_type, inclusion_area, - inclusion_area_buffer, overwrite, verbose, gms_workers) + alpha_class.alpha_test(calibrated, mask_type, inclusion_area, + inclusion_area_buffer, overwrite, verbose, num_workers_inundate) - def composite(self, version_2, calibrated = False, overwrite = True, verbose = False): + #def composite(self, version_2, calibrated = False, overwrite = True, verbose = False): '''Class method for compositing MS and FR inundation and creating an agreement raster with stats Parameters @@ -355,7 +357,7 @@ def composite(self, version_2, calibrated = False, overwrite = True, verbose = F overwrite : bool If True, overwites pre-existing test cases within the test_cases directory. ''' - + ''' if re.match(r'(.*)(_ms|_fr)', self.version): composite_version_name = re.sub(r'(.*)(_ms|_fr)', r'\1_comp', self.version, count=1) else: @@ -412,15 +414,17 @@ def composite(self, version_2, calibrated = False, overwrite = True, verbose = F composite_test_case.clean_ahps_outputs(os.path.join(composite_test_case.dir, magnitude)) composite_test_case.write_metadata(calibrated, 'COMP') + ''' - def write_metadata(self, calibrated, model): + def write_metadata(self, calibrated ): + '''Writes metadata files for a test_case directory.''' with open(os.path.join(self.dir,'eval_metadata.json'),'w') as meta: - eval_meta = { 'calibrated' : calibrated , 'model' : model } - meta.write( - json.dumps(eval_meta,indent=2) - ) + eval_meta = { 'calibrated' : calibrated , 'model' : 'FIM4' } + meta.write( json.dumps(eval_meta,indent=2) ) + def clean_ahps_outputs(self, magnitude_directory): + '''Cleans up `total_area` files from an input AHPS magnitude directory.''' output_file_list = [os.path.join(magnitude_directory, of) for of in os.listdir(magnitude_directory)] for output_file in output_file_list: diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index 6182cc105..06af105ea 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -108,6 +108,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include for magnitude in magnitude_list: for version in versions_to_aggregate: + # this may be pulling in older fim3 versions (so keep the _ms, _fr and _c) if '_ms' in version: extent_config = 'MS' elif ('_fr' in version) or (version == 'fim_2_3_3'): @@ -166,6 +167,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: + # this may be pulling in older fim3 versions (so keep the _ms, _fr and _c) if '_ms' in version: extent_config = 'MS' elif ('_fr' in version) or (version == 'fim_2_3_3'): @@ -233,56 +235,40 @@ def progress_bar_handler(executor_dict, verbose, desc): # Sample usage: ''' === FOR GMS - python /foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v gms_test_synth_combined -jh 2 -jb 40 -m /outputs/gms_test_synth_combined/gms_synth_metrics.csv -vg -o + python /foss_fim/tools/synthesize_test_cases.py -c DEV -v some_runtime_folder_name -jh 3 -ji 25 -m /outputs/some_runtime_folder_name/some_runtime_folder_name_metrics.csv -o Notes: - - gms_input.csv MUST be in the folder suggested. + - fim_input.csv MUST be in the folder suggested. - the -v param is the name in the folder in the "outputs/" directory where the test hucs are at. It also becomes the folder names inside the test_case folders when done. - the -vg param may not be working (will be assessed better on later releases). - Find a balance between -jh (number of jobs for hucs) versus -jb (number of jobs for branches) on quick tests on a 96 core machine, we tried [1 @ 80], [2 @ 40], and [3 @ 25] (and others). - -jb 3 -jh 25 was noticably better. You can likely go more jb cores with better success, just + -jb 3 -ji 25 was noticably better. You can likely go more jb cores with better success, just experiment. Start times, End Times and duration are now included. - The -m can be any path and any name. To see your outputs in the test_case folder (hard coded path), you can check for outputs using - (cd .... to your test_case folder), then command becomes find . -name gms_test_* -type d (Notice the + (cd .... to your test_case folder), then command becomes find . -name some_runtime_folder_name* -type d (Notice the the -name can be a wildcard for your -v param (or the whole -v value)) + If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your - command becomes: find . -name gms_test_* -type d -exec rm -rdf {} + - ''' - ''' - === FOR FIM - python /foss_fim/tools/synthesize_test_cases.py -c DEV -e MS -v dev_fim_3_0_29_1_ms -jh 4 -m /outputs/dev_fim_3_0_29_1_ms/alpha/alpha_master_metrics_fim_3_0_29_1_ms_src_adjust.csv -vg -o - - Notes: - - the -v param is the name in the folder in the "outputs/" directory where the test hucs are at. - It also becomes the folder names inside the test_case folders when done. - - the -vg param may not be working (will be assessed better on later releases). - - The -m can be any path and any name. - - To see your outputs in the test_case folder (hard coded path), you can check for outputs using - (cd .... to your test_case folder), then command becomes find . -name dev_fim_3_0_29_1_* -type d (Notice the - the -name can be a wildcard for your -v param (or the whole -v value)) - If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your - command becomes: find . -name dev_fim_3_0_29_1_* -type d -exec rm -rdf {} + + command becomes: find . -name some_runtime_folder_name* -type d -exec rm -rdf {} + ''' # Parse arguments. parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False,default='DEV') parser.add_argument('-l','--calibrated',help='Denotes use of calibrated n values. This should be taken from meta-data from hydrofabric dir',required=False, default=False,action='store_true') - parser.add_argument('-e','--model',help='Denotes model used. FR, MS, or GMS allowed. This should be taken from meta-data in hydrofabric dir.',required=True) parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") parser.add_argument('-jh','--job-number-huc',help='Number of processes to use for HUC scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) - parser.add_argument('-jb','--job-number-branch',help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) + #parser.add_argument('-jb','--job-number-branch',help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) + parser.add_argument('-jb','--job-number-inundate',help='Number of processes to use for inundating. HUC and inundate job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-dc', '--dev-version-to-compare', nargs='+', help='Specify the name(s) of a dev (testing) version to include in master metrics CSV. Pass a space-delimited list.',required=False) parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=False,default=None) - parser.add_argument('-d','--fr-run-dir',help='Name of test case directory containing FIM for FR model',required=False,default=None) parser.add_argument('-vr','--verbose',help='Verbose',required=False,default=None,action='store_true') parser.add_argument('-vg','--gms-verbose',help='GMS Verbose Progress Bar',required=False,default=None,action='store_true') @@ -291,15 +277,13 @@ def progress_bar_handler(executor_dict, verbose, desc): config = args['config'] fim_version = args['fim_version'] job_number_huc = args['job_number_huc'] - job_number_branch = args['job_number_branch'] + job_number_inundate = args['job_number_inundate'] special_string = args['special_string'] benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] dev_versions_to_compare = args['dev_version_to_compare'] master_metrics_csv = args['master_metrics_csv'] - fr_run_dir = args['fr_run_dir'] calibrated = args['calibrated'] - model = args['model'] verbose = bool(args['verbose']) gms_verbose = bool(args['gms_verbose']) @@ -311,13 +295,13 @@ def progress_bar_handler(executor_dict, verbose, desc): print() # check job numbers - total_cpus_requested = job_number_huc * job_number_branch - total_cpus_available = os.cpu_count() - 1 + total_cpus_requested = job_number_huc * job_number_inundate + total_cpus_available = os.cpu_count() - 2 if total_cpus_requested > total_cpus_available: - raise ValueError('The HUC job number, {}, multiplied by the branch job number, {}, '\ - 'exceeds your machine\'s available CPU count minus one. '\ - 'Please lower the job_number_huc or job_number_branch'\ - 'values accordingly.'.format(job_number_huc,job_number_branch) + raise ValueError('The HUC job number, {}, multiplied by the inundate job number, {}, '\ + 'exceeds your machine\'s available CPU count minus two. '\ + 'Please lower the -j (job_number_huc) or job_number_inundate'\ + 'values accordingly.'.format(job_number_huc, job_number_inundate) ) # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. @@ -355,11 +339,10 @@ def progress_bar_handler(executor_dict, verbose, desc): alpha_test_args = { 'calibrated': calibrated, - 'model': model, 'mask_type': 'huc', 'overwrite': overwrite, - 'verbose':gms_verbose if model == 'GMS' else verbose, - 'gms_workers': job_number_branch + 'verbose':gms_verbose, + 'num_workers_inundate': job_number_inundate } try: @@ -371,62 +354,9 @@ def progress_bar_handler(executor_dict, verbose, desc): sys.exit(1) # Send the executor to the progress bar and wait for all MS tasks to finish - progress_bar_handler(executor_dict, True, f"Running {model} alpha test cases with {job_number_huc} workers") + progress_bar_handler(executor_dict, True, f"Running alpha test cases with {job_number_huc} workers") #wait(executor_dict.keys()) - ## Composite alpha test run is initiated by a MS `model` and providing a `fr_run_dir` - if model == 'MS' and fr_run_dir: - - ## Rebuild all test cases list with the FR version, loop through them and apply the alpha test - all_test_cases = test_case.list_all_test_cases(version = fr_run_dir, archive = archive_results, - benchmark_categories=[] if benchmark_category == "all" else [benchmark_category]) - - with ProcessPoolExecutor(max_workers=job_number_huc) as executor: - executor_dict = {} - for test_case_class in all_test_cases: - if not os.path.exists(test_case_class.fim_dir): - continue - alpha_test_args = { - 'calibrated': calibrated, - 'model': model, - 'mask_type': 'huc', - 'verbose':verbose, - 'overwrite': overwrite - } - try: - future = executor.submit(test_case_class.alpha_test, **alpha_test_args) - executor_dict[future] = test_case_class.test_id - except Exception as ex: - print(f"*** {ex}") - traceback.print_exc() - sys.exit(1) - - # Send the executor to the progress bar and wait for all FR tasks to finish - progress_bar_handler(executor_dict, True, f"Running FR test cases with {job_number_huc} workers") - #wait(executor_dict.keys()) - - # Loop through FR test cases, build composite arguments, and submit the composite method to the process pool - with ProcessPoolExecutor(max_workers=job_number_huc) as executor: - executor_dict = {} - for test_case_class in all_test_cases: - composite_args = { - 'version_2': fim_version, # this is the MS version name since `all_test_cases` are FR - 'calibrated': calibrated, - 'overwrite': overwrite, - 'verbose': verbose - } - - try: - future = executor.submit(test_case_class.alpha_test, **alpha_test_args) - executor_dict[future] = test_case_class.test_id - except Exception as ex: - print(f"*** {ex}") - traceback.print_exc() - sys.exit(1) - - # Send the executor to the progress bar - progress_bar_handler(executor_dict, verbose, f"Compositing test cases with {job_number_huc} workers") - if dev_versions_to_compare != None: dev_versions_to_include_list = dev_versions_to_compare + previous_fim_list else: diff --git a/unit_tests/README.md b/unit_tests/README.md index 037a22f7a..b7f37d1d3 100644 --- a/unit_tests/README.md +++ b/unit_tests/README.md @@ -33,20 +33,20 @@ Start a docker container as you normally would for any development. ie) docker r - ie) docker run --rm -it --name mytest -v /home/abcd/projects/dev/innudation-mapping/:/foss_fim -v /abcd_share/foss_fim/outputs/:/outputs -v /abcs_share/foss_fim/:/data fim_4:dev_20220208_8eba0ee For unit tests to work, you need to run the following (if not already in place). -Notice a modified branch "deny_gms_branch_unittests.lst" (special for unittests) +Notice a modified branch "deny_branch_unittests.lst" (special for unittests) Here are the params and args you need if you need to re-run unit and branch -gms_pipeline.sh -n fim_unit_test_data_do_not_remove -u "02020005 02030201 05030104" -bd /foss_fim/config/deny_gms_branch_unittests.lst -ud None -j 1 -o +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u "02020005 02030201 05030104" -bd /foss_fim/config/deny_branch_unittests.lst -ud None -j 1 -o -**NOTICE: the deny file used for gms_run_branch... its a special one for unittests `deny_gms_branch_unittests.lst`. +**NOTICE: the deny file used for fim_pipeline... its a special one for unittests `deny_branch_unittests.lst`. If you need to run inundation tests, fun the following: -python3 foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v fim_unit_test_data_do_not_remove -jh 1 -jb 1 -m /outputs/fim_unit_test_data_do_not_remove/alpha_test_metrics.csv -o +python3 foss_fim/tools/synthesize_test_cases.py -c DEV -v fim_unit_test_data_do_not_remove -jh 1 -jb 1 -m /outputs/fim_unit_test_data_do_not_remove/alpha_test_metrics.csv -o If you want to test just one unit test, here is an example: -At the root terminal window, run: python ./foss_fim/unit_tests/gms/derive_level_paths_unittests.py or python ./foss_fim/unit_tests/clip_vectors_to_wbd_unittests.py +At the root terminal window, run: python ./foss_fim/unit_tests/derive_level_paths_unittests.py or python ./foss_fim/unit_tests/clip_vectors_to_wbd_unittests.py (replace with your own script and path name) ## Key Notes for creating new unit tests @@ -68,7 +68,7 @@ At the root terminal window, run: python ./foss_fim/unit_tests/gms/derive_level 9) One py file = one "{original py file name}_unittests.py" file. -10) Sometimes you may want to run a full successful "happy path" version through gms_run_by_unit.sh (or similar), to get all of the files you need in place to do your testing. However.. you will want to ensure that none of the outputs are being deleted during the test. One way to solve this is to put in an invalid value for the "-d" parameter (denylist). ie) Normally: gms_run_unit.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d /foss_fim/config/deny_gms_unit_default.lst -o, but ours would be +10) Sometimes you may want to run a full successful "happy path" version through fim_pipeline.sh (or similar), to get all of the files you need in place to do your testing. However.. you will want to ensure that none of the outputs are being deleted during the test. One way to solve this is to put in an invalid value for the "-d" parameter (denylist). ie) Normally: fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d /foss_fim/config/deny_unit_default.lst -o, but ours would be gms_run_unit.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d no_list -o. ## Future Enhancements @@ -86,14 +86,14 @@ gms_run_unit.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/con - When you create a "fail" test function, you can load up the normal full "params" from the json file, but then you can override (hardcoded) the one (or rarely more than one) variable inside the function. There is a way to "catch" a failure you are expecting, ensure it is the type of failure you expected and make that "failure" to become a true fail, ie) a unit test pass. -An example is in unit_tests/gms/Derive_level_paths_unittests.py -> test_Derive_level_paths_invalid_input_stream_network (function). It is incomplete but give you the pattern. +An example is in unit_tests/Derive_level_paths_unittests.py -> test_Derive_level_paths_invalid_input_stream_network (function). It is incomplete but give you the pattern. We have almost no "assert"s yet, but most unit test usually have one or more "assert" test. See https://docs.python.org/3/library/unittest.html for more details. ## Unit tests currently available -python3 /foss_fim/unit_tests/gms/derive_level_paths_unittests.py +python3 /foss_fim/unit_tests/derive_level_paths_unittests.py python3 /foss_fim/unit_tests/tools/inundate_unittests.py -python3 /foss_fim/unit_tests/tools/gms_tools/inundate_gms_unittests.py +python3 /foss_fim/unit_tests/tools/inundate_gms_unittests.py python3 /foss_fim/unit_tests/clip_vectors_to_wbd_unittests.py python3 /foss_fim/unit_tests/filter_catchments_and_add_attributes_unittests.py python3 /foss_fim/unit_tests/rating_curve_comparison_unittests.py diff --git a/unit_tests/__template_unittests.py b/unit_tests/__template_unittests.py index 44e960e59..1ba2981ce 100644 --- a/unit_tests/__template_unittests.py +++ b/unit_tests/__template_unittests.py @@ -53,7 +53,7 @@ def test__success(self): # for now we are happy if no exceptions are thrown. - # < See the readme.md, clip_vectors_to_wbd_unittests.py or gms/derive_level_paths_unittests.py + # < See the readme.md, clip_vectors_to_wbd_unittests.py or derive_level_paths_unittests.py # for examples.> # Replace this stub example with your own. # Try to use the same order to make it easier. diff --git a/unit_tests/check_unit_errors_params.json b/unit_tests/check_unit_errors_params.json index d97d43084..bf89913a1 100644 --- a/unit_tests/check_unit_errors_params.json +++ b/unit_tests/check_unit_errors_params.json @@ -1,11 +1,11 @@ { "valid_data": { - "fim_dir": "/outputs/gms_example_unit_tests" + "fim_dir": "/outputs/check_errors_example_unit_tests" }, "invalid_path": { - "fim_dir": "/outputs/gms_example_unit_tests_not_not_valid" + "fim_dir": "/outputs/check_errors_example_unit_tests_not_not_valid" } } \ No newline at end of file diff --git a/unit_tests/check_unit_errors_unittests.py b/unit_tests/check_unit_errors_unittests.py index 92fc10640..04ebdfda7 100644 --- a/unit_tests/check_unit_errors_unittests.py +++ b/unit_tests/check_unit_errors_unittests.py @@ -211,7 +211,7 @@ def __create_temp_unit_errors_folder_files(self, output_folder, number_of_files) dummy files. A dummy file for non_zero_exit_codes.log will also be created. Input: - output_folder: the root output folder (ie. /outputs/gms_example_unit_tests/) + output_folder: the root output folder (ie. /outputs/check_errors_example_unit_tests/) number_of_files: how many dummy files to create Returns: True if the 'unit_errors' folder did original exist and needs to be renamed back. @@ -249,7 +249,7 @@ def __remove_temp_unit_errors_folder(self, output_folder): Note.. it is possible the temp folder does not exist, but we don't need to error out on it. Sometimes we got here by a try/catch cleanup Input: - output_folder: the root output folder (ie. /outputs/gms_example_unit_tests/) + output_folder: the root output folder (ie. /outputs/check_errors_example_unit_tests/) Returns: nothing ''' diff --git a/unit_tests/gms/derive_level_paths_params.json b/unit_tests/derive_level_paths_params.json similarity index 100% rename from unit_tests/gms/derive_level_paths_params.json rename to unit_tests/derive_level_paths_params.json diff --git a/unit_tests/gms/derive_level_paths_unittests.py b/unit_tests/derive_level_paths_unittests.py similarity index 98% rename from unit_tests/gms/derive_level_paths_unittests.py rename to unit_tests/derive_level_paths_unittests.py index 7027aa51c..5f68a72d8 100644 --- a/unit_tests/gms/derive_level_paths_unittests.py +++ b/unit_tests/derive_level_paths_unittests.py @@ -13,7 +13,7 @@ from unit_tests_utils import FIM_unit_test_helpers as ut_helpers # importing python folders in other direcories -sys.path.append('/foss_fim/src/gms/') +sys.path.append('/foss_fim/src/') import derive_level_paths as src import stream_branches from utils.fim_enums import FIM_exit_codes as fec @@ -118,7 +118,7 @@ def test_Derive_level_paths_success_drop_low_stream_orders_no_branches_left(self reach_id_attribute = params["reach_id_attribute"], verbose = params["verbose"]) - self.assertEqual(se.exception.code, fec.GMS_UNIT_NO_BRANCHES.value) + self.assertEqual(se.exception.code, fec.UNIT_NO_BRANCHES.value) print(f"Test Success: {inspect.currentframe().f_code.co_name}") print("*************************************************************") diff --git a/unit_tests/filter_catchments_and_add_attributes_unittests.py b/unit_tests/filter_catchments_and_add_attributes_unittests.py index 2243f7edc..f838593ba 100644 --- a/unit_tests/filter_catchments_and_add_attributes_unittests.py +++ b/unit_tests/filter_catchments_and_add_attributes_unittests.py @@ -20,8 +20,8 @@ # ************* -# Important: For this to work, when you run gms_run_branch.sh, you have to -# use deny_gms_branches_dev.lst or the word "none" for the deny list arguments +# Important: For this to work, when you run fim_pipeline.sh, you have to +# use deny_branches_dev.lst or the word "none" for the deny list arguments # (unit and branch deny list parameters). Key files need to exist for this unit test to work. class test_filter_catchments_and_add_attributes(unittest.TestCase): diff --git a/unit_tests/gms/outputs_cleanup_params.json b/unit_tests/outputs_cleanup_params.json similarity index 80% rename from unit_tests/gms/outputs_cleanup_params.json rename to unit_tests/outputs_cleanup_params.json index 456f03463..a545e6753 100644 --- a/unit_tests/gms/outputs_cleanup_params.json +++ b/unit_tests/outputs_cleanup_params.json @@ -2,7 +2,7 @@ "valid_specific_branch_data": { "src_dir": "/data/outputs/fim_unit_test_data_do_not_remove/02020005/branches/3246000009", - "deny_list": "/foss_fim/config/deny_gms_branches.lst", + "deny_list": "/foss_fim/config/deny_branches.lst", "branch_id": "3246000009", "verbose": true }, @@ -10,7 +10,7 @@ "valid_directory_data": { "src_dir": "/data/outputs/fim_unit_test_data_do_not_remove", - "deny_list": "/foss_fim/config/deny_gms_branches.lst", + "deny_list": "/foss_fim/config/deny_branches.lst", "branch_id": "0", "verbose": true }, diff --git a/unit_tests/gms/outputs_cleanup_unittests.py b/unit_tests/outputs_cleanup_unittests.py similarity index 99% rename from unit_tests/gms/outputs_cleanup_unittests.py rename to unit_tests/outputs_cleanup_unittests.py index 006c95ab0..3d241f681 100644 --- a/unit_tests/gms/outputs_cleanup_unittests.py +++ b/unit_tests/outputs_cleanup_unittests.py @@ -13,7 +13,7 @@ # importing python folders in other directories sys.path.append('/foss_fim/src/') -import gms.outputs_cleanup as src +import outputs_cleanup as src # NOTE: This goes directly to the function. # Ultimately, it should emulate going through command line (not import -> direct function call) diff --git a/unit_tests/split_flows_unittests.py b/unit_tests/split_flows_unittests.py index e517f078a..451ec15cd 100644 --- a/unit_tests/split_flows_unittests.py +++ b/unit_tests/split_flows_unittests.py @@ -36,8 +36,8 @@ def setUpClass(self): def test_split_flows_success(self): ''' - The /data/outputs/gms_example_unit_tests//branches//demDerived_reaches_split_.gpkg and - /data/outputs/gms_example_unit_tests//branches//demDerived_reaches_split_points_.gpkg should not exit prior to this test. + The /data/outputs/example_unit_tests//branches//demDerived_reaches_split_.gpkg and + /data/outputs/example_unit_tests//branches//demDerived_reaches_split_points_.gpkg should not exit prior to this test. If the test is successful, these file will be created. ''' diff --git a/unit_tests/tools/gms_tools/inundate_gms_params.json b/unit_tests/tools/inundate_gms_params.json similarity index 100% rename from unit_tests/tools/gms_tools/inundate_gms_params.json rename to unit_tests/tools/inundate_gms_params.json diff --git a/unit_tests/tools/gms_tools/inundate_gms_unittests.py b/unit_tests/tools/inundate_gms_unittests.py similarity index 98% rename from unit_tests/tools/gms_tools/inundate_gms_unittests.py rename to unit_tests/tools/inundate_gms_unittests.py index 54c742f93..ade2bfa30 100644 --- a/unit_tests/tools/gms_tools/inundate_gms_unittests.py +++ b/unit_tests/tools/inundate_gms_unittests.py @@ -11,7 +11,7 @@ sys.path.append('/foss_fim/unit_tests/') from unit_tests_utils import FIM_unit_test_helpers as ut_helpers -sys.path.append('/foss_fim/tools/gms_tools') +sys.path.append('/foss_fim/tools') import inundate_gms as src # NOTE: This goes directly to the function. From 167fdd7c4fd8e6473c114dd4526a9c6a87564957 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Thu, 16 Feb 2023 14:59:03 -0600 Subject: [PATCH 02/11] Dropped in error --- src/generate_branch_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/generate_branch_list.py b/src/generate_branch_list.py index b25dab876..2529a223e 100755 --- a/src/generate_branch_list.py +++ b/src/generate_branch_list.py @@ -5,7 +5,6 @@ import pandas as pd import sys -#sys.path.append('/foss_fim/src') from stream_branches import StreamNetwork def generate_branch_list(stream_network_dissolved, branch_id_attribute, From 35a7fe6834f34b0b160ea645e8ce5ab8977dcf25 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Thu, 16 Feb 2023 21:47:11 +0000 Subject: [PATCH 03/11] minor text correction --- tools/synthesize_test_cases.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index 06af105ea..bc133e8fc 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -234,8 +234,7 @@ def progress_bar_handler(executor_dict, verbose, desc): # Sample usage: ''' - === FOR GMS - python /foss_fim/tools/synthesize_test_cases.py -c DEV -v some_runtime_folder_name -jh 3 -ji 25 -m /outputs/some_runtime_folder_name/some_runtime_folder_name_metrics.csv -o + python /foss_fim/tools/synthesize_test_cases.py -c DEV -v some_runtime_folder_name -jh 3 -jb 25 -m /outputs/some_runtime_folder_name/some_runtime_folder_name_metrics.csv -o Notes: - fim_input.csv MUST be in the folder suggested. @@ -244,7 +243,7 @@ def progress_bar_handler(executor_dict, verbose, desc): - the -vg param may not be working (will be assessed better on later releases). - Find a balance between -jh (number of jobs for hucs) versus -jb (number of jobs for branches) on quick tests on a 96 core machine, we tried [1 @ 80], [2 @ 40], and [3 @ 25] (and others). - -jb 3 -ji 25 was noticably better. You can likely go more jb cores with better success, just + -jb 3 -jb 25 was noticably better. You can likely go more jb cores with better success, just experiment. Start times, End Times and duration are now included. - The -m can be any path and any name. From d29de2130d5f91f47eb868b21fd393c28d19b99d Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Thu, 16 Feb 2023 22:03:22 +0000 Subject: [PATCH 04/11] fix bug and added deprecation warning --- tools/copy_test_case_folders.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/copy_test_case_folders.py b/tools/copy_test_case_folders.py index bd2a1540b..4e601e224 100644 --- a/tools/copy_test_case_folders.py +++ b/tools/copy_test_case_folders.py @@ -9,11 +9,15 @@ sys.path.append('/foss_fim/src/') import aggregate_branch_lists as agg +####### +# Feb 16, 2023: This file is likely now deprecated. If it is used again, the aggregate_branch_lists file will need to +# be reconsiiled as a newer one exists in the src directory with a similar name. +####### + def copy_folders(folder_name_list, source_dir, target_dir, - create_fim_input_list=False, - overwrite=False): + create_fim_input_list=False): ''' Summary: Scans the source_directory looking for huc values from the huc list. Once found, @@ -33,8 +37,6 @@ def copy_folders(folder_name_list, "aggregate_branch_lists.py" file will be called in order to make the fim_input.csv file. The fim_input.csv is required for futher processing such as reprocessing branchs or set up for test cases. - - overwrite: if this value is set to true, the entire target_directory will be emptied of its - contents as this process starts if the folder exists. Output: - A copy of huc directories (or named folders) as listed in the folder_name_list. ''' @@ -77,8 +79,8 @@ def copy_folders(folder_name_list, # call this code, which scans each huc (unit) directory looking for the branch_id.lst # and adds them together to create the fim_inputs.csv file # Note: folder_name_list needs to be a huc list to work) - agg.aggregate_branch_lists(folder_name_list, target_dir, "gms_inputs.csv") - print("gms_inputs.csv created") + agg.aggregate_branch_lists(folder_name_list, target_dir, "fim_inputs.csv") + print("fim_inputs.csv created") if __name__ == '__main__': @@ -87,7 +89,7 @@ def copy_folders(folder_name_list, # and it has to be run on each root folder, one at a time (for now. aka.. no wildcards) # Sample Usage: -#python /foss_fim/tools/copy_test_case_folders.py -f /data/inputs/huc_lists/huc_list_for_alpha_tests_22020420.lst -s /outputs/copy_test_synth/ -t /data/outputs/copy_test_synth_combined -a +#python /foss_fim/tools/copy_test_case_folders.py -f /data/inputs/huc_lists/huc_list_for_alpha_tests_20220420.lst -s /outputs/copy_test_synth/ -t /data/outputs/copy_test_synth_combined -a # NOTE the 'a' at the end meaning go ahead create the gms_input.csv. This is normally # left for the last folder to be copied over. @@ -106,8 +108,8 @@ def copy_folders(folder_name_list, help='Target folder where the folders will be copied to', required=True) - parser.add_argument('-a','--create_gms_input_list', - help='Create the gms_input.csv list after coping', + parser.add_argument('-a','--create_fim_input_list', + help='Create the fim_input.csv list after copying', required=False, default=False, action='store_true') args = vars(parser.parse_args()) From 346c69ce97ad183378f861cde73a7124b81cf864 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Thu, 16 Feb 2023 16:06:31 -0600 Subject: [PATCH 05/11] Update CHANGELOG.md --- docs/CHANGELOG.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 1845a039b..7d4cacd2a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,116 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.1.(pending) - 2023-02-16 - [PR#816](https://github.com/NOAA-OWP/inundation-mapping/pull/816) + + +This update removes the remaining elements of FIM3 code. It further removes the phrases "GMS" as basically the entire FIM4 model. FIM4 is GMS. With removing FIM3, it also means remove concepts of "MS" and "FR" which were no longer relevant in FIM4. There are only a few remaining places that will continue with the phrase "GMS" which is in some inundation files which are being re-evaluated. Some deprecated files have been removed and some subfolders removed. + +There are a lot of duplicate explanations for some of the changes, so here is a shortcut system. + +- desc 1: Remove or rename values based on phrase "GMS, MS and/or FR" +- desc 2: Moved file from the /src/gms folder to /src or /tools/gms_tools to /tools +- desc 3: No longer needed as we now use the `fim_pipeline.sh` processing model. + +### Removals + +- `data` + - `acquire_and_preprocess_inputs.py`: No longer needed +- `gms_pipeline.sh` : see desc 3 +- `gms_run_branch.sh` : see desc 3 +- `gms_run_post_processing.sh` : see desc 3 +- `gms_run_unit.sh` : see desc 3 +- `src` + - `gms` + - `init.py` : folder removed, no longer needed. + - `aggregate_branch_lists.py`: no longer needed. Newer version already exists in src directory. + - `remove_error_branches.py` : see desc 3 + - `run_by_unit.sh` : see desc 3 + - `test_new_crosswalk.sh` : no longer needed + - `time_and_tee_run_by_branch.sh` : see desc 3 + - `time_and_tee_run_by_unit.sh` : see desc 3 + - `output_cleanup.py` : see desc 3 + - `tools/gms_tools` + - `init.py` : folder removed, no longer needed. + +### Changes + +- `config` + - `deny_branch_unittests.lst` : renamed from `deny_gms_branch_unittests.lst` + - `deny_branch_zero.lst` : renamed from `deny_gms_branch_zero.lst` + - `deny_branches.lst` : renamed from `deny_gms_branches.lst` + - `deny_unit.lst` : renamed from `deny_gms_unit.lst` + - `params_template.env` : see desc 1 + +- `data` + - `nws` + - `preprocess_ahps_nws.py`: Added deprecation note: If reused, it needs review and/or upgrades. + - `acquire_and_preprocess_3dep_dems.py` : see desc 1 + - `fim_post_processing.sh` : see desc 1, plus a small pathing change. + - `fim_pre_processing.sh` : see desc 1 + - ` src` + - `add_crosswalk.py` : see desc 1. Also cleaned up some formatting and commented out a code block in favor of a better way to pass args from "__main__" + - `bash_variables.env` : see desc 1 + - `buffer_stream_branches.py` : see desc 2 + - `clip_rasters_to_branches.py` : see desc 2 + - `crosswalk_nwm_demDerived.py` : see desc 1 and desc 2 + - `delineate_hydros_and_produce_HAND.sh` : see desc 1 and desc 2 + - `derive_level_paths.py` : see desc 1 and desc 2 + - `edit_points.py` : see desc 2 + - `filter_inputs_by_huc.py`: see desc 1 and desc 2 + - `finalize_srcs.py`: see desc 2 + - `generate_branch_list.py` : see desc 1 + - `make_rem.py` : see desc 2 + - `make_dem.py` : see desc 2 + - `outputs_cleanup.py`: see desc 1 + - `process_branch.sh`: see desc 1 + - `query_vectors_by_branch_polygons.py`: see desc 2 + - `reset_mannings.py` : see desc 2 + - `run_by_branch.sh`: see desc 1 + - `run_unit_wb.sh`: see desc 1 + - `stream_branches.py`: see desc 2 + - `subset_catch_list_by_branch_id.py`: see desc 2 + - `toDo.md`: see desc 2 + - `usgs_gage_aggregate.py`: see desc 1 + - `usgs_gage_unit_setup.py` : see desc 1 + - `utils` + - `fim_enums.py` : see desc 1 + +- `tools` + - `combine_crosswalk_tables.py` : see desc 2 + - `compare_ms_and_non_ms_metrics.py` : see desc 2 + - `compile_comp_stats.py`: see desc 2 and added note about possible deprecation. + - `compile_computation_stats.py` : see desc 2 and added note about possible deprecation. + - `composite_inundation.py` : see desc 1 : note.. references a file called inundate_gms which retains it's name for now. + - `consolidate_metrics.py`: added note about possible deprecation. + - `copy_test_case_folders.py`: see desc 1 + - `eval_plots.py` : see desc 1 + - `evaluate_continuity.py`: see desc 2 + - `find_max_catchment_breadth.py` : see desc 2 + - `generate_categorical_fim_mapping.py` : see desc 1 + - `inundate_gms.py`: see desc 1 and desc 2. Note: This file has retained its name with the phrase "gms" in it as it might be upgraded later and there are some similar files with similar names. + - `inundate_nation.py` : see desc 1 + - `inundation.py`: text styling change + - `make_boxes_from_bounds.py`: text styling change + - `mosaic_inundation.py`: see desc 1 and desc 2 + - `overlapping_inundation.py`: see desc 2 + - `plots.py` : see desc 2 + - `run_test_case.py`: see desc 1 + - `synthesize_test_cases.py`: see desc 1 + +- `unit_tests` + - `README.md`: see desc 1 + - `__template_unittests.py`: see desc 1 + - `check_unit_errors_params.json` and `check_unit_errors_unittests.py` : see desc 1 + - `derive_level_paths_params.json` and `derive_level_paths_unittests.py` : see desc 1 and desc 2 + - `filter_catchments_and_add_attributes_unittests.py`: see desc 1 + - `outputs_cleanup_params.json` and `outputs_cleanup_unittests.py`: see desc 1 and desc 2 + - `split_flows_unittests.py` : see desc 1 + - `tools` + - `inundate_gms_params.json` and `inundate_gms_unittests.py`: see desc 1 and desc 2 + +

+ ## v4.1.0.0 - 2023-01-30 - [PR#806](https://github.com/NOAA-OWP/inundation-mapping/pull/806) As we move to Amazon Web Service, AWS, we need to change our processing system. Currently, it is `gms_pipeline.sh` using bash "parallel" as an iterator which then first processes all HUCs, but not their branches. One of `gms_pipeline.sh`'s next steps is to do branch processing which is again iterated via "parallel". AKA. Units processed as one step, branches processed as second independent step. From 81841c120897290637a54a12579033c35b6895e4 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 Feb 2023 20:36:22 +0000 Subject: [PATCH 06/11] Update to restore a few files to prev editions --- src/add_crosswalk.py | 163 ++++++++++++++++++--------------- tools/mosaic_inundation.py | 2 +- tools/run_test_case.py | 106 +++++++++++---------- tools/synthesize_test_cases.py | 109 ++++++++++++++++++---- 4 files changed, 233 insertions(+), 147 deletions(-) diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index a690dfce4..15dfe2efc 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -15,6 +15,8 @@ from utils.shared_variables import FIM_ID from memory_profiler import profile +# Feb 17, 2023 +# We want to explore using FR methodology as branch zero @mem_profile def add_crosswalk(input_catchments_fileName, @@ -31,6 +33,7 @@ def add_crosswalk(input_catchments_fileName, input_nwmcatras_fileName, mannings_n, input_nwmcat_fileName, + extent, small_segments_filename, calibration_mode=False): @@ -41,63 +44,95 @@ def add_crosswalk(input_catchments_fileName, min_catchment_area = float(os.environ['min_catchment_area']) #0.25# min_stream_length = float(os.environ['min_stream_length']) #0.5# - ## crosswalk using stream segment midpoint method - input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + if extent == 'FR': + ## crosswalk using majority catchment method - input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) - if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) - input_nwmcat=input_nwmcat.set_index('feature_id') + # calculate majority catchments + majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) + input_majorities = gpd.GeoDataFrame.from_features(majority_calc) + input_majorities = input_majorities.rename(columns={'majority' : 'feature_id'}) - input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + input_majorities = input_majorities[:][input_majorities['feature_id'].notna()] + if input_majorities.feature_id.dtype != 'int': input_majorities.feature_id = input_majorities.feature_id.astype(int) + if input_majorities.HydroID.dtype != 'int': input_majorities.HydroID = input_majorities.HydroID.astype(int) - # Get stream midpoint - stream_midpoint = [] - hydroID = [] - for i,lineString in enumerate(input_flows.geometry): - hydroID = hydroID + [input_flows.loc[i,'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] + input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + relevant_input_nwmflows = input_nwmflows[input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] + relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id','order_']) - input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') - input_flows_midpoint = input_flows_midpoint.set_index('HydroID') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') + output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') - # Create crosswalk - crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() - crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(input_majorities[['HydroID','feature_id']],on='HydroID') + if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) + output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') - # fill in missing ms - crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] - for index, stream in crosswalk_missing.iterrows(): + elif (extent == 'MS') | (extent == 'GMS'): + ## crosswalk using stream segment midpoint method + input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) - # find closest nwm catchment by distance - distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] - min_dist = min(distances) - nwmcat_index=distances.index(min_dist) + # only reduce nwm catchments to mainstems if running mainstems + if extent == 'MS': + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] - # update crosswalk - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length - crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area + input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) + if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) + input_nwmcat=input_nwmcat.set_index('feature_id') - crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) - crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') + input_nwmflows = input_nwmflows.rename(columns={'ID':'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - if len(crosswalk) < 1: - print ("No relevant streams within HUC boundaries.") - sys.exit(0) + # Get stream midpoint + stream_midpoint = [] + hydroID = [] + for i,lineString in enumerate(input_flows.geometry): + hydroID = hydroID + [input_flows.loc[i,'HydroID']] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk,on='HydroID') + input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') + input_flows_midpoint = input_flows_midpoint.set_index('HydroID') - if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(crosswalk,on='HydroID') + # Create crosswalk + crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() + crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) - # Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh - if 'areasqkm' not in output_catchments.columns: - output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) + # fill in missing ms + crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] + for index, stream in crosswalk_missing.iterrows(): - output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') + # find closest nwm catchment by distance + distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] + min_dist = min(distances) + nwmcat_index=distances.index(min_dist) + + # update crosswalk + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area + + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) + crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') + + if len(crosswalk) < 1: + print ("No relevant streams within HUC boundaries.") + sys.exit(0) + + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk,on='HydroID') + + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(crosswalk,on='HydroID') + + # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh + if 'areasqkm' not in output_catchments.columns: + output_catchments['areasqkm'] = output_catchments.geometry.area/(1000**2) + + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') output_flows['ManningN'] = mannings_n @@ -195,14 +230,22 @@ def add_crosswalk(input_catchments_fileName, for src_index, src_stage in new_values.iterrows(): output_src.loc[(output_src['HydroID']== short_id) & (output_src['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1] - output_src = output_src.merge(crosswalk[['HydroID','feature_id']],on='HydroID') + if extent == 'FR': + output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID') + elif (extent == 'MS') | (extent == 'GMS'): + output_src = output_src.merge(crosswalk[['HydroID','feature_id']],on='HydroID') output_crosswalk = output_src[['HydroID','feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) + ## bathy estimation integration in synthetic rating curve calculations + #if (bathy_src_calc == True and extent == 'MS'): + # output_src = bathy_rc_lookup(output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName) + #else: + # print('Note: NOT using bathy estimation approach to modify the SRC...') + # make hydroTable output_hydro_table = output_src.loc[:,['HydroID','feature_id','NextDownID','order_','Number of Cells','SurfaceArea (m2)','BedArea (m2)','TopWidth (m)','LENGTHKM','AREASQKM','WettedPerimeter (m)','HydraulicRadius (m)','WetArea (m2)','Volume (m3)','SLOPE','ManningN','Stage','Discharge (m3s-1)']] - output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) ## Set placeholder variables to be replaced in post-processing (as needed). Create here to ensure consistent column vars ## These variables represent the original unmodified values @@ -273,7 +316,7 @@ def add_crosswalk(input_catchments_fileName, if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Crosswalk for networks; calculate synthetic rating curves; update short rating curves') + parser = argparse.ArgumentParser(description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) @@ -288,35 +331,11 @@ def add_crosswalk(input_catchments_fileName, parser.add_argument('-y','--input-nwmcatras-fileName',help='NWM catchment raster',required=False) parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True) parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True) + parser.add_argument('-p','--extent',help='GMS only for now', default='GMS', required=False) parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True) parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true') args = vars(parser.parse_args()) add_crosswalk(**args) - - ''' - input_catchments_fileName = args['input_catchments_fileName'] - input_flows_fileName = args['input_flows_fileName'] - input_srcbase_fileName = args['input_srcbase_fileName'] - output_catchments_fileName = args['output_catchments_fileName'] - output_flows_fileName = args['output_flows_fileName'] - output_src_fileName = args['output_src_fileName'] - output_src_json_fileName = args['output_src_json_fileName'] - output_crosswalk_fileName = args['output_crosswalk_fileName'] - output_hydro_table_fileName = args['output_hydro_table_fileName'] - input_huc_fileName = args['input_huc_fileName'] - input_nwmflows_fileName = args['input_nwmflows_fileName'] - input_nwmcatras_fileName = args['input_nwmcatras_fileName'] - mannings_n = args['mannings_n'] - input_nwmcat_fileName = args['input_nwmcat_fileName'] - small_segments_filename = args['small_segments_filename'] - calibration_mode = args['calibration_mode'] - - add_crosswalk(input_catchments_fileName, - input_flows_fileName, - input_srcbase_fileName, - output_catchments_fileName, - output_flows_fileName, - output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,small_segments_filename,calibration_mode) - ''' \ No newline at end of file + diff --git a/tools/mosaic_inundation.py b/tools/mosaic_inundation.py index 9a267a8fa..215e6511c 100755 --- a/tools/mosaic_inundation.py +++ b/tools/mosaic_inundation.py @@ -25,7 +25,7 @@ def Mosaic_inundation( map_file, is_mosaic_for_branches = False ): # Notes: - # - If is_mosaic_for_gms_branches is true, the mosaic output name + # - If is_mosaic_for_branches is true, the mosaic output name # will add the HUC into the output name for overwrite resons. # check input diff --git a/tools/run_test_case.py b/tools/run_test_case.py index c1e8c58a2..3920f540d 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -4,7 +4,7 @@ import pandas as pd from tools_shared_variables import TEST_CASES_DIR, INPUTS_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES, MAGNITUDE_DICT, elev_raster_ndv -# from inundation import inundate +from inundation import inundate from mosaic_inundation import Mosaic_inundation from inundate_gms import Inundate_gms from tools_shared_functions import compute_contingency_stats_from_rasters @@ -144,14 +144,16 @@ def list_all_test_cases(cls, version, archive, benchmark_categories=[]): return test_case_list - def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', - inclusion_area_buffer=0, overwrite=True, verbose=False, num_workers_inundate=1): + def alpha_test(self, calibrated=False, model='', mask_type='huc', inclusion_area='', + inclusion_area_buffer=0, overwrite=True, verbose=False, gms_workers=1): '''Compares a FIM directory with benchmark data from a variety of sources. Parameters ---------- calibrated : bool Whether or not this FIM version is calibrated. + model : str + MS or FR extent of the model. This value will be written to the eval_metadata.json. mask_type : str Mask type to feed into inundation.py. inclusion_area : int @@ -162,7 +164,7 @@ def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', If True, overwites pre-existing test cases within the test_cases directory. verbose : bool If True, prints out all pertinent data. - num_workers_branches : int + gms_workers : int Number of worker processes assigned to GMS processing. ''' @@ -176,7 +178,6 @@ def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', self.stats_modes_list = ['total_area'] # Create paths to fim_run outputs for use in inundate() - ''' if model != 'GMS': self.rem = os.path.join(self.fim_dir, 'rem_zeroed_masked.tif') if not os.path.exists(self.rem): @@ -190,16 +191,15 @@ def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', else: self.catchment_poly = os.path.join(self.fim_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') self.hydro_table = os.path.join(self.fim_dir, 'hydroTable.csv') - ''' - + # Map necessary inputs for inundate(). self.hucs, self.hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' if inclusion_area != '': inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name self.mask_dict.update({inclusion_area_name: {'path': inclusion_area, - 'buffer': int(inclusion_area_buffer), - 'operation': 'include'}}) + 'buffer': int(inclusion_area_buffer), + 'operation': 'include'}}) # Append the concatenated inclusion_area_name and buffer. if inclusion_area_buffer == None: inclusion_area_buffer = 0 @@ -215,14 +215,14 @@ def alpha_test(self, calibrated=False, mask_type='huc', inclusion_area='', for magnitude in validation_data: for instance in validation_data[magnitude]: # instance will be the lid for AHPS sites and '' for other sites # For each site, inundate the REM and compute aggreement raster with stats - self._inundate_and_compute(magnitude, instance, verbose=verbose, num_workers_inundate=num_workers_inundate) + self._inundate_and_compute(magnitude, instance, model=model, verbose=verbose, gms_workers=gms_workers) # Clean up 'total_area' outputs from AHPS sites if self.is_ahps: self.clean_ahps_outputs(os.path.join(self.dir, magnitude)) # Write out evaluation meta-data - self.write_metadata(calibrated) + self.write_metadata(calibrated, model) except KeyboardInterrupt: print("Program aborted via keyboard interrupt") @@ -235,10 +235,12 @@ def _inundate_and_compute(self, magnitude, lid, compute_only = False, + model = '', verbose = False, - num_workers_inundate = 1): + gms_workers = 1): '''Method for inundating and computing contingency rasters as part of the alpha_test. - + Used by both the alpha_test() and composite() methods. + Parameters ---------- magnitude : str @@ -281,31 +283,30 @@ def _inundate_and_compute(self, # Inundate REM if not compute_only: # composite alpha tests don't need to be inundated - #if model == 'GMS': - fh.vprint("Begin FIM Inundation", verbose) - map_file = Inundate_gms( hydrofabric_dir = os.path.dirname(self.fim_dir), - forecast = benchmark_flows, - num_workers = num_workers_inundate, - hucs = self.huc, - inundation_raster = predicted_raster_path, - inundation_polygon = None, - depths_raster = None, - verbose = verbose, - log_file = None, - output_fileNames = None ) - #if (len(map_file) > 0): - fh.vprint("Begin FIM Mosaic", verbose) - Mosaic_inundation( map_file, - mosaic_attribute = 'inundation_rasters', - mosaic_output = predicted_raster_path, - mask = os.path.join(self.fim_dir,'wbd.gpkg'), - unit_attribute_name = 'huc8', - nodata = elev_raster_ndv, - workers = 1, - remove_inputs = True, - subset = None, - verbose = verbose ) - ''' + if model == 'GMS': + fh.vprint("Begin FIM4 Inundation", verbose) + map_file = Inundate_gms( hydrofabric_dir = os.path.dirname(self.fim_dir), + forecast = benchmark_flows, + num_workers = gms_workers, + hucs = self.huc, + inundation_raster = predicted_raster_path, + inundation_polygon = None, + depths_raster = None, + verbose = verbose, + log_file = None, + output_fileNames = None ) + #if (len(map_file) > 0): + fh.vprint("Begin FIM4 Mosaic", verbose) + Mosaic_inundation( map_file, + mosaic_attribute = 'inundation_rasters', + mosaic_output = predicted_raster_path, + mask = os.path.join(self.fim_dir,'wbd.gpkg'), + unit_attribute_name = 'huc8', + nodata = elev_raster_ndv, + workers = 1, + remove_inputs = True, + subset = None, + verbose = verbose ) # FIM v3 and before else: fh.vprint("Begin FIM v3 (or earlier) Inundation", verbose) @@ -317,8 +318,7 @@ def _inundate_and_compute(self, quiet=True) if inundate_result != 0: return inundate_result - ''' - + # Create contingency rasters and stats fh.vprint("Begin creating contingency rasters and stats", verbose) if os.path.isfile(predicted_raster_path): @@ -335,17 +335,15 @@ def _inundate_and_compute(self, @classmethod - def run_alpha_test(cls, version, test_id, magnitude, calibrated, archive_results=False, - mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, - overwrite=True, verbose=False, num_workers_inundate=1): - + def run_alpha_test(cls, version, test_id, magnitude, calibrated, model, archive_results=False, + mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True, verbose=False, gms_workers=1): '''Class method for instantiating the test_case class and running alpha_test directly''' alpha_class = cls(test_id, version, archive_results) - alpha_class.alpha_test(calibrated, mask_type, inclusion_area, - inclusion_area_buffer, overwrite, verbose, num_workers_inundate) + alpha_class.alpha_test(calibrated, model, mask_type, inclusion_area, + inclusion_area_buffer, overwrite, verbose, gms_workers) - #def composite(self, version_2, calibrated = False, overwrite = True, verbose = False): + def composite(self, version_2, calibrated = False, overwrite = True, verbose = False): '''Class method for compositing MS and FR inundation and creating an agreement raster with stats Parameters @@ -357,7 +355,7 @@ def run_alpha_test(cls, version, test_id, magnitude, calibrated, archive_results overwrite : bool If True, overwites pre-existing test cases within the test_cases directory. ''' - ''' + if re.match(r'(.*)(_ms|_fr)', self.version): composite_version_name = re.sub(r'(.*)(_ms|_fr)', r'\1_comp', self.version, count=1) else: @@ -414,17 +412,15 @@ def run_alpha_test(cls, version, test_id, magnitude, calibrated, archive_results composite_test_case.clean_ahps_outputs(os.path.join(composite_test_case.dir, magnitude)) composite_test_case.write_metadata(calibrated, 'COMP') - ''' - def write_metadata(self, calibrated ): - + def write_metadata(self, calibrated, model): '''Writes metadata files for a test_case directory.''' with open(os.path.join(self.dir,'eval_metadata.json'),'w') as meta: - eval_meta = { 'calibrated' : calibrated , 'model' : 'FIM4' } - meta.write( json.dumps(eval_meta,indent=2) ) - + eval_meta = { 'calibrated' : calibrated , 'model' : model } + meta.write( + json.dumps(eval_meta,indent=2) + ) def clean_ahps_outputs(self, magnitude_directory): - '''Cleans up `total_area` files from an input AHPS magnitude directory.''' output_file_list = [os.path.join(magnitude_directory, of) for of in os.listdir(magnitude_directory)] for output_file in output_file_list: diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index bc133e8fc..21c5d47d4 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -108,7 +108,6 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include for magnitude in magnitude_list: for version in versions_to_aggregate: - # this may be pulling in older fim3 versions (so keep the _ms, _fr and _c) if '_ms' in version: extent_config = 'MS' elif ('_fr' in version) or (version == 'fim_2_3_3'): @@ -167,7 +166,6 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: - # this may be pulling in older fim3 versions (so keep the _ms, _fr and _c) if '_ms' in version: extent_config = 'MS' elif ('_fr' in version) or (version == 'fim_2_3_3'): @@ -234,7 +232,8 @@ def progress_bar_handler(executor_dict, verbose, desc): # Sample usage: ''' - python /foss_fim/tools/synthesize_test_cases.py -c DEV -v some_runtime_folder_name -jh 3 -jb 25 -m /outputs/some_runtime_folder_name/some_runtime_folder_name_metrics.csv -o + === FOR (FIM 4) + python /foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v gms_test_synth_combined -jh 2 -jb 40 -m /outputs/gms_test_synth_combined/gms_synth_metrics.csv -vg -o Notes: - fim_input.csv MUST be in the folder suggested. @@ -243,31 +242,47 @@ def progress_bar_handler(executor_dict, verbose, desc): - the -vg param may not be working (will be assessed better on later releases). - Find a balance between -jh (number of jobs for hucs) versus -jb (number of jobs for branches) on quick tests on a 96 core machine, we tried [1 @ 80], [2 @ 40], and [3 @ 25] (and others). - -jb 3 -jb 25 was noticably better. You can likely go more jb cores with better success, just + -jb 3 -jh 25 was noticably better. You can likely go more jb cores with better success, just experiment. Start times, End Times and duration are now included. - The -m can be any path and any name. To see your outputs in the test_case folder (hard coded path), you can check for outputs using - (cd .... to your test_case folder), then command becomes find . -name some_runtime_folder_name* -type d (Notice the + (cd .... to your test_case folder), then command becomes find . -name gms_test_* -type d (Notice the the -name can be a wildcard for your -v param (or the whole -v value)) - If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your - command becomes: find . -name some_runtime_folder_name* -type d -exec rm -rdf {} + + command becomes: find . -name gms_test_* -type d -exec rm -rdf {} + + ''' + ''' + === FOR FIM 3 + python /foss_fim/tools/synthesize_test_cases.py -c DEV -e MS -v dev_fim_3_0_29_1_ms -jh 4 -m /outputs/dev_fim_3_0_29_1_ms/alpha/alpha_master_metrics_fim_3_0_29_1_ms_src_adjust.csv -vg -o + + Notes: + - the -v param is the name in the folder in the "outputs/" directory where the test hucs are at. + It also becomes the folder names inside the test_case folders when done. + - the -vg param may not be working (will be assessed better on later releases). + - The -m can be any path and any name. + + To see your outputs in the test_case folder (hard coded path), you can check for outputs using + (cd .... to your test_case folder), then command becomes find . -name dev_fim_3_0_29_1_* -type d (Notice the + the -name can be a wildcard for your -v param (or the whole -v value)) + If you want to delete the test outputs, test the outputs as suggest immediately above, but this time your + command becomes: find . -name dev_fim_3_0_29_1_* -type d -exec rm -rdf {} + ''' # Parse arguments. parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False,default='DEV') parser.add_argument('-l','--calibrated',help='Denotes use of calibrated n values. This should be taken from meta-data from hydrofabric dir',required=False, default=False,action='store_true') + parser.add_argument('-e','--model',help='Denotes model used. FR, MS, or GMS allowed. This should be taken from meta-data in hydrofabric dir.', default='GMS', required=False) parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") parser.add_argument('-jh','--job-number-huc',help='Number of processes to use for HUC scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) - #parser.add_argument('-jb','--job-number-branch',help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) - parser.add_argument('-jb','--job-number-inundate',help='Number of processes to use for inundating. HUC and inundate job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) + parser.add_argument('-jb','--job-number-branch',help='Number of processes to use for Branch scale operations. HUC and Batch job numbers should multiply to no more than one less than the CPU count of the machine.',required=False, default=1,type=int) parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-dc', '--dev-version-to-compare', nargs='+', help='Specify the name(s) of a dev (testing) version to include in master metrics CSV. Pass a space-delimited list.',required=False) parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=False,default=None) + parser.add_argument('-d','--fr-run-dir',help='Name of test case directory containing FIM for FR model',required=False,default=None) parser.add_argument('-vr','--verbose',help='Verbose',required=False,default=None,action='store_true') parser.add_argument('-vg','--gms-verbose',help='GMS Verbose Progress Bar',required=False,default=None,action='store_true') @@ -276,13 +291,15 @@ def progress_bar_handler(executor_dict, verbose, desc): config = args['config'] fim_version = args['fim_version'] job_number_huc = args['job_number_huc'] - job_number_inundate = args['job_number_inundate'] + job_number_branch = args['job_number_branch'] special_string = args['special_string'] benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] dev_versions_to_compare = args['dev_version_to_compare'] master_metrics_csv = args['master_metrics_csv'] + fr_run_dir = args['fr_run_dir'] calibrated = args['calibrated'] + model = args['model'] verbose = bool(args['verbose']) gms_verbose = bool(args['gms_verbose']) @@ -294,13 +311,13 @@ def progress_bar_handler(executor_dict, verbose, desc): print() # check job numbers - total_cpus_requested = job_number_huc * job_number_inundate - total_cpus_available = os.cpu_count() - 2 + total_cpus_requested = job_number_huc * job_number_branch + total_cpus_available = os.cpu_count() - 1 if total_cpus_requested > total_cpus_available: - raise ValueError('The HUC job number, {}, multiplied by the inundate job number, {}, '\ - 'exceeds your machine\'s available CPU count minus two. '\ - 'Please lower the -j (job_number_huc) or job_number_inundate'\ - 'values accordingly.'.format(job_number_huc, job_number_inundate) + raise ValueError('The HUC job number, {}, multiplied by the branch job number, {}, '\ + 'exceeds your machine\'s available CPU count minus one. '\ + 'Please lower the job_number_huc or job_number_branch'\ + 'values accordingly.'.format(job_number_huc,job_number_branch) ) # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. @@ -338,10 +355,11 @@ def progress_bar_handler(executor_dict, verbose, desc): alpha_test_args = { 'calibrated': calibrated, + 'model': model, 'mask_type': 'huc', 'overwrite': overwrite, - 'verbose':gms_verbose, - 'num_workers_inundate': job_number_inundate + 'verbose':gms_verbose if model == 'GMS' else verbose, + 'gms_workers': job_number_branch } try: @@ -353,9 +371,62 @@ def progress_bar_handler(executor_dict, verbose, desc): sys.exit(1) # Send the executor to the progress bar and wait for all MS tasks to finish - progress_bar_handler(executor_dict, True, f"Running alpha test cases with {job_number_huc} workers") + progress_bar_handler(executor_dict, True, f"Running {model} alpha test cases with {job_number_huc} workers") #wait(executor_dict.keys()) + ## Composite alpha test run is initiated by a MS `model` and providing a `fr_run_dir` + if model == 'MS' and fr_run_dir: + + ## Rebuild all test cases list with the FR version, loop through them and apply the alpha test + all_test_cases = test_case.list_all_test_cases(version = fr_run_dir, archive = archive_results, + benchmark_categories=[] if benchmark_category == "all" else [benchmark_category]) + + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + executor_dict = {} + for test_case_class in all_test_cases: + if not os.path.exists(test_case_class.fim_dir): + continue + alpha_test_args = { + 'calibrated': calibrated, + 'model': model, + 'mask_type': 'huc', + 'verbose':verbose, + 'overwrite': overwrite + } + try: + future = executor.submit(test_case_class.alpha_test, **alpha_test_args) + executor_dict[future] = test_case_class.test_id + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + sys.exit(1) + + # Send the executor to the progress bar and wait for all FR tasks to finish + progress_bar_handler(executor_dict, True, f"Running FR test cases with {job_number_huc} workers") + #wait(executor_dict.keys()) + + # Loop through FR test cases, build composite arguments, and submit the composite method to the process pool + with ProcessPoolExecutor(max_workers=job_number_huc) as executor: + executor_dict = {} + for test_case_class in all_test_cases: + composite_args = { + 'version_2': fim_version, # this is the MS version name since `all_test_cases` are FR + 'calibrated': calibrated, + 'overwrite': overwrite, + 'verbose': verbose + } + + try: + future = executor.submit(test_case_class.alpha_test, **alpha_test_args) + executor_dict[future] = test_case_class.test_id + except Exception as ex: + print(f"*** {ex}") + traceback.print_exc() + sys.exit(1) + + # Send the executor to the progress bar + progress_bar_handler(executor_dict, verbose, f"Compositing test cases with {job_number_huc} workers") + if dev_versions_to_compare != None: dev_versions_to_include_list = dev_versions_to_compare + previous_fim_list else: From 2ed95b64aebde4d0c81b8954e75e04d579c0f49a Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 Feb 2023 20:49:34 +0000 Subject: [PATCH 07/11] temp rename some files with major merge conflicts --- unit_tests/{README.md => fim3_README.md} | 0 .../{__template_unittests.py => fim3__template_unittests.py} | 0 ...unit_errors_params.json => fim3_check_unit_errors_params.json} | 0 ...it_errors_unittests.py => fim3_check_unit_errors_unittests.py} | 0 ...evel_paths_params.json => fim3_derive_level_paths_params.json} | 0 ....py => fim3_filter_catchments_and_add_attributes_unittests.py} | 0 .../{split_flows_unittests.py => fim3_split_flows_unittests.py} | 0 .../{inundate_gms_params.json => fim3_inundate_gms_params.json} | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename unit_tests/{README.md => fim3_README.md} (100%) rename unit_tests/{__template_unittests.py => fim3__template_unittests.py} (100%) rename unit_tests/{check_unit_errors_params.json => fim3_check_unit_errors_params.json} (100%) rename unit_tests/{check_unit_errors_unittests.py => fim3_check_unit_errors_unittests.py} (100%) rename unit_tests/{derive_level_paths_params.json => fim3_derive_level_paths_params.json} (100%) rename unit_tests/{filter_catchments_and_add_attributes_unittests.py => fim3_filter_catchments_and_add_attributes_unittests.py} (100%) rename unit_tests/{split_flows_unittests.py => fim3_split_flows_unittests.py} (100%) rename unit_tests/tools/{inundate_gms_params.json => fim3_inundate_gms_params.json} (100%) diff --git a/unit_tests/README.md b/unit_tests/fim3_README.md similarity index 100% rename from unit_tests/README.md rename to unit_tests/fim3_README.md diff --git a/unit_tests/__template_unittests.py b/unit_tests/fim3__template_unittests.py similarity index 100% rename from unit_tests/__template_unittests.py rename to unit_tests/fim3__template_unittests.py diff --git a/unit_tests/check_unit_errors_params.json b/unit_tests/fim3_check_unit_errors_params.json similarity index 100% rename from unit_tests/check_unit_errors_params.json rename to unit_tests/fim3_check_unit_errors_params.json diff --git a/unit_tests/check_unit_errors_unittests.py b/unit_tests/fim3_check_unit_errors_unittests.py similarity index 100% rename from unit_tests/check_unit_errors_unittests.py rename to unit_tests/fim3_check_unit_errors_unittests.py diff --git a/unit_tests/derive_level_paths_params.json b/unit_tests/fim3_derive_level_paths_params.json similarity index 100% rename from unit_tests/derive_level_paths_params.json rename to unit_tests/fim3_derive_level_paths_params.json diff --git a/unit_tests/filter_catchments_and_add_attributes_unittests.py b/unit_tests/fim3_filter_catchments_and_add_attributes_unittests.py similarity index 100% rename from unit_tests/filter_catchments_and_add_attributes_unittests.py rename to unit_tests/fim3_filter_catchments_and_add_attributes_unittests.py diff --git a/unit_tests/split_flows_unittests.py b/unit_tests/fim3_split_flows_unittests.py similarity index 100% rename from unit_tests/split_flows_unittests.py rename to unit_tests/fim3_split_flows_unittests.py diff --git a/unit_tests/tools/inundate_gms_params.json b/unit_tests/tools/fim3_inundate_gms_params.json similarity index 100% rename from unit_tests/tools/inundate_gms_params.json rename to unit_tests/tools/fim3_inundate_gms_params.json From 2ae3c53633bfd7db84d72ce9d504d02e3ded0659 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 Feb 2023 21:01:59 +0000 Subject: [PATCH 08/11] rename file --- ...el_paths_unittests.py => fim3_derive_level_paths_unittests.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename unit_tests/{derive_level_paths_unittests.py => fim3_derive_level_paths_unittests.py} (100%) diff --git a/unit_tests/derive_level_paths_unittests.py b/unit_tests/fim3_derive_level_paths_unittests.py similarity index 100% rename from unit_tests/derive_level_paths_unittests.py rename to unit_tests/fim3_derive_level_paths_unittests.py From 29eb7a6e1cf6bca758a43e0c161887e70316149e Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 Feb 2023 21:04:06 +0000 Subject: [PATCH 09/11] Temp add file back manually --- unit_tests/README.MD | 144 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 unit_tests/README.MD diff --git a/unit_tests/README.MD b/unit_tests/README.MD new file mode 100644 index 000000000..9725aa8cf --- /dev/null +++ b/unit_tests/README.MD @@ -0,0 +1,144 @@ +## Inundation Mapping: Flood Inundation Mapping for U.S. National Water Model + +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). + +#### For more information, see the [Inundation Mapping Wiki](https://github.com/NOAA-OWP/inundation-mapping/wiki). + +# This folder (`/unit_tests`) holds files for unit testing python files + +## Creating unit tests + +For each python code file that is being tested, unit tests should come in two files: a unit test file (based on the original python code file) and an accompanying json paramerters file. + +The files should be named following FIM convention: + +{source py file name}_test.py -> `derive_level_paths_test.py` +{source py file name}_params.json -> `derive_level_paths_params.json` + + +## Tips to create a new json file for a new python unit test file. + +There are multiple way to figure out a set of default json parameters for the new unit test file. + +One way is to use the incoming arg parser. Most python files include the code block of ` __name__ == '__main__':`, followed by external arg parsing (`args = vars(parser.parse_args()`). +* Add a `print(args)` or similar, and get all the values including keys as output. +* Copy that into an editor being used to create the json file. +* Add a line break after every comma. +* Find/replace all single quotes to double quotes then cleanup the left tab formatting. + + +## Running unit tests + +Start a docker container as you normally would for any development. +```bash +docker run --rm -it --name -v /home//projects//:/foss_fim {your docker image name} +``` +Example: +```bash +docker run --rm -it --name mytest -v /home/abcd/projects/dev/innudation-mapping/:/foss_fim -v /abcd_share/foss_fim/outputs/:/outputs -v /abcs_share/foss_fim/:/data fim_4:dev_20220208_8eba0ee +``` + +For unit tests to work, you need to run the following (if not already in place). +Notice a modified branch "deny_gms_branch_unittests.lst" (special for unittests) + +Here are the params and args you need if you need to re-run unit and branch + +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u "02020005 02030201 05030104" -bd /foss_fim/config/deny_gms_branch_unittests.lst -ud None -j 1 -o +``` + +**NOTICE: the deny file used for gms_run_branch... its a special one for unittests `deny_gms_branch_unittests.lst`. + +If you need to run inundation tests, fun the following: + +```bash +python3 foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v fim_unit_test_data_do_not_remove -jh 1 -jb 1 -m /outputs/fim_unit_test_data_do_not_remove/alpha_test_metrics.csv -o +``` +### If you'd like to test the whole unit test suite: +``` +pytest /foss_fim/unit_tests +``` + +This is not 100% stable, as accurate paths for the parameters `.json` files are not included in this repository, are not uniform accross machines, and are subject to change. + +### If you want to test just one unit test (from the root terminal window): + +```bash +pytest /foss_fim/unit_tests/gms/derive_level_paths_test.py + or +pytest /foss_fim/unit_tests/clip_vectors_to_wbd_test.py +``` + +### If you'd like to run a particular test, you can, for example: +``` +pytest -v -s -k test_append_id_to_file_name_single_identifier_success +``` + +If one test case is choosen, it will scan all of the test files, and scan for the method (test case) specified. + +## Key Notes for creating new unit tests +1) All test functions must start with the phrase `test_`. That is how pytest picks them up. The rest of the function name does not have to match the pattern of `function_name_being_tested` but should. Further, the rest of the function name should say what the test is about, ie) `_failed_input_path`. ie) `test_{some_function_name_from_the_source_code_file}_failed_input_path`. It is fine that the function names get very long (common in the industry). + +2) If you are using this for development purposes, use caution when checking back in files for unit tests files and json file. If you check it in, it still has to work and work for others and not just for a dev test you are doing. + +3) As of now, you can not control the order that unit tests are run within a unit test file. + +4) There must be at least one associated `{original py file name}_params.json` file per unit test. + +5) There must be at least one "happy path (successful)" test inside the unittest file. ie) one function that is expected to fully pass. You can have multiple "happy path" tests if you want to change values that are fundamentally different, but fully expected to pass. + +6) Json files can have multiple nodes, so the default "happy path/success" is suggested to be called `valid_data`, if one does not already exist. Generally, the individual unit tests, will call the `valid_data` node and override a local method value to a invalid data. In semi-rare, but possible cases, you can add more nodes if you like, but try not to create new Json nodes for a few small field changes, generally only use a new node if there are major and lots of value changes (ie: major different test conditions). + +7) Unit test functions can and should test for all "outputs" from a source function. This includes the functions's return output (if any), but any global variables it might set, and even that saved output files (such as .tif files) have been created and successfully. It is ok to have multiple validation checks (or asserts) in one unit test function. + +8) One Python file = one `{original py file name}_test.py` file. + +9) Sometimes you may want to run a full successful "happy path" version through `fim_pipeline.sh` (or similar), to get all of the files you need in place to do your testing. However, you will want to ensure that none of the outputs are being deleted during the test. One way to solve this is to put in an invalid value for the `-d` parameter (denylist). +ie: +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d /foss_fim/config/deny_gms_unit_default.lst -o +``` +but ours would be: +```bash +fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d no_list -o +``` + +## [Pytest](https://docs.pytest.org/en/7.2.x/) particulars + +The `pyproject.toml` file has been added, which contains the build system requirements of Python projects. This file used to specify which warnings are disabled to pass our unit tests. + +A `__init__.py` file has been added to both subdirectories (`/gms` & `/tools`) in order for the `pytest` command run in the `/unit_tests` to pick up the tests in those directories as well. + +Luckily, `pytest` works well with The Python Standard Library `unittest`. This made the migration of previous unit tests using `unittest` over to `pytest` quite simple. The caveat is that our current unit tests employ elements of both libraries. A full transition to `pytest` will ideally take place at a future date. + +## Testing for failing conditions +- Over time, you want to start adding functions that specifically look for fail conditions. This is a key part of unit test systems. It is not uncommon to have many dozens of tests functions in one unit test file. Each "fail" type test, must check for ONLY one variable value change. A "fail" test function should not fundamentally pass in an invalid huc AND an invalid file path. Those two failing test conditions and must have two seperate unit test functions. + +- It is possible to let a unit test have more than one failed value but only if they are tightly related to trigger just one failure (RARE though). YES.. Over time, we will see TONS of these types of fail unit test functions and they will take a while to run. + +- When you create a "fail" test function, you can load up the normal full "params" from the json file, but then you can override (hardcoded) the one (or rarely more than one) variable inside the function. There is a way to "catch" a failure you are expecting, ensure it is the type of failure you expected and make that "failure" to become a true fail, ie) a unit test pass. + +An example is in `unit_tests/gms/Derive_level_paths_test.py` -> `test_Derive_level_paths_invalid_input_stream_network` (function). This example gives you the pattern implemented in Pytest. + +## Future Enhancements +1) Full transition to the `pytest` library, removing classes of `unittest.TestCase` and taking full advantage of available code re-use patterns offered through `pytest`. + +2) Over time, it is expected that python files will be broken down to many functions inside the file. Currently, we tend to have one very large function in each python file which makes unit testing harder and less specific. Generally function will result in at least one "happy path" unit test function. This might require having test unit test outputs, such as sample .tif or small .gpkg files in subfolders in the unit tests folder, but this remains to be seen. Note: The files `/gms/derive_level_paths_test.py` and `clip_vectors_to_wbd_test.py` are not complete as they do not yet test all output from a method. + +## Unit tests currently available +``` +pytest /foss_fim/unit_tests/gms/derive_level_paths_test.py +pytest /foss_fim/unit_tests/gms/outputs_cleanup_test.py +pytest /foss_fim/unit_tests/tools/inundate_gms_test.py +pytest /foss_fim/unit_tests/tools/inundation_test.py +pytest /foss_fim/unit_tests/check_unit_errors_test.py +pytest /foss_fim/unit_tests/clip_vectors_to_wbd_test.py +pytest /foss_fim/unit_tests/filter_catchments_and_add_attributes_test.py +pytest /foss_fim/unit_tests/rating_curve_comparison_test.py +pytest /foss_fim/unit_tests/shared_functions_test.py +pytest /foss_fim/unit_tests/split_flows_test.py +pytest /foss_fim/unit_tests/usgs_gage_crosswalk_test.py +pytest /foss_fim/unit_tests/aggregate_branch_lists_test.py +pytest /foss_fim/unit_tests/generate_branch_list_csv_test.py +pytest /foss_fim/unit_tests/generate_branch_list_test.py +``` From cc36394b284eb8a11bfd39dabc6738bd710d45c4 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Fri, 17 Feb 2023 15:07:32 -0600 Subject: [PATCH 10/11] Delete README.MD --- unit_tests/README.MD | 144 ------------------------------------------- 1 file changed, 144 deletions(-) delete mode 100644 unit_tests/README.MD diff --git a/unit_tests/README.MD b/unit_tests/README.MD deleted file mode 100644 index 9725aa8cf..000000000 --- a/unit_tests/README.MD +++ /dev/null @@ -1,144 +0,0 @@ -## Inundation Mapping: Flood Inundation Mapping for U.S. National Water Model - -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). - -#### For more information, see the [Inundation Mapping Wiki](https://github.com/NOAA-OWP/inundation-mapping/wiki). - -# This folder (`/unit_tests`) holds files for unit testing python files - -## Creating unit tests - -For each python code file that is being tested, unit tests should come in two files: a unit test file (based on the original python code file) and an accompanying json paramerters file. - -The files should be named following FIM convention: - -{source py file name}_test.py -> `derive_level_paths_test.py` -{source py file name}_params.json -> `derive_level_paths_params.json` - - -## Tips to create a new json file for a new python unit test file. - -There are multiple way to figure out a set of default json parameters for the new unit test file. - -One way is to use the incoming arg parser. Most python files include the code block of ` __name__ == '__main__':`, followed by external arg parsing (`args = vars(parser.parse_args()`). -* Add a `print(args)` or similar, and get all the values including keys as output. -* Copy that into an editor being used to create the json file. -* Add a line break after every comma. -* Find/replace all single quotes to double quotes then cleanup the left tab formatting. - - -## Running unit tests - -Start a docker container as you normally would for any development. -```bash -docker run --rm -it --name -v /home//projects//:/foss_fim {your docker image name} -``` -Example: -```bash -docker run --rm -it --name mytest -v /home/abcd/projects/dev/innudation-mapping/:/foss_fim -v /abcd_share/foss_fim/outputs/:/outputs -v /abcs_share/foss_fim/:/data fim_4:dev_20220208_8eba0ee -``` - -For unit tests to work, you need to run the following (if not already in place). -Notice a modified branch "deny_gms_branch_unittests.lst" (special for unittests) - -Here are the params and args you need if you need to re-run unit and branch - -```bash -fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u "02020005 02030201 05030104" -bd /foss_fim/config/deny_gms_branch_unittests.lst -ud None -j 1 -o -``` - -**NOTICE: the deny file used for gms_run_branch... its a special one for unittests `deny_gms_branch_unittests.lst`. - -If you need to run inundation tests, fun the following: - -```bash -python3 foss_fim/tools/synthesize_test_cases.py -c DEV -e GMS -v fim_unit_test_data_do_not_remove -jh 1 -jb 1 -m /outputs/fim_unit_test_data_do_not_remove/alpha_test_metrics.csv -o -``` -### If you'd like to test the whole unit test suite: -``` -pytest /foss_fim/unit_tests -``` - -This is not 100% stable, as accurate paths for the parameters `.json` files are not included in this repository, are not uniform accross machines, and are subject to change. - -### If you want to test just one unit test (from the root terminal window): - -```bash -pytest /foss_fim/unit_tests/gms/derive_level_paths_test.py - or -pytest /foss_fim/unit_tests/clip_vectors_to_wbd_test.py -``` - -### If you'd like to run a particular test, you can, for example: -``` -pytest -v -s -k test_append_id_to_file_name_single_identifier_success -``` - -If one test case is choosen, it will scan all of the test files, and scan for the method (test case) specified. - -## Key Notes for creating new unit tests -1) All test functions must start with the phrase `test_`. That is how pytest picks them up. The rest of the function name does not have to match the pattern of `function_name_being_tested` but should. Further, the rest of the function name should say what the test is about, ie) `_failed_input_path`. ie) `test_{some_function_name_from_the_source_code_file}_failed_input_path`. It is fine that the function names get very long (common in the industry). - -2) If you are using this for development purposes, use caution when checking back in files for unit tests files and json file. If you check it in, it still has to work and work for others and not just for a dev test you are doing. - -3) As of now, you can not control the order that unit tests are run within a unit test file. - -4) There must be at least one associated `{original py file name}_params.json` file per unit test. - -5) There must be at least one "happy path (successful)" test inside the unittest file. ie) one function that is expected to fully pass. You can have multiple "happy path" tests if you want to change values that are fundamentally different, but fully expected to pass. - -6) Json files can have multiple nodes, so the default "happy path/success" is suggested to be called `valid_data`, if one does not already exist. Generally, the individual unit tests, will call the `valid_data` node and override a local method value to a invalid data. In semi-rare, but possible cases, you can add more nodes if you like, but try not to create new Json nodes for a few small field changes, generally only use a new node if there are major and lots of value changes (ie: major different test conditions). - -7) Unit test functions can and should test for all "outputs" from a source function. This includes the functions's return output (if any), but any global variables it might set, and even that saved output files (such as .tif files) have been created and successfully. It is ok to have multiple validation checks (or asserts) in one unit test function. - -8) One Python file = one `{original py file name}_test.py` file. - -9) Sometimes you may want to run a full successful "happy path" version through `fim_pipeline.sh` (or similar), to get all of the files you need in place to do your testing. However, you will want to ensure that none of the outputs are being deleted during the test. One way to solve this is to put in an invalid value for the `-d` parameter (denylist). -ie: -```bash -fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d /foss_fim/config/deny_gms_unit_default.lst -o -``` -but ours would be: -```bash -fim_pipeline.sh -n fim_unit_test_data_do_not_remove -u 05030104 -c /foss_fim/config/params_template.env -j 1 -d no_list -o -``` - -## [Pytest](https://docs.pytest.org/en/7.2.x/) particulars - -The `pyproject.toml` file has been added, which contains the build system requirements of Python projects. This file used to specify which warnings are disabled to pass our unit tests. - -A `__init__.py` file has been added to both subdirectories (`/gms` & `/tools`) in order for the `pytest` command run in the `/unit_tests` to pick up the tests in those directories as well. - -Luckily, `pytest` works well with The Python Standard Library `unittest`. This made the migration of previous unit tests using `unittest` over to `pytest` quite simple. The caveat is that our current unit tests employ elements of both libraries. A full transition to `pytest` will ideally take place at a future date. - -## Testing for failing conditions -- Over time, you want to start adding functions that specifically look for fail conditions. This is a key part of unit test systems. It is not uncommon to have many dozens of tests functions in one unit test file. Each "fail" type test, must check for ONLY one variable value change. A "fail" test function should not fundamentally pass in an invalid huc AND an invalid file path. Those two failing test conditions and must have two seperate unit test functions. - -- It is possible to let a unit test have more than one failed value but only if they are tightly related to trigger just one failure (RARE though). YES.. Over time, we will see TONS of these types of fail unit test functions and they will take a while to run. - -- When you create a "fail" test function, you can load up the normal full "params" from the json file, but then you can override (hardcoded) the one (or rarely more than one) variable inside the function. There is a way to "catch" a failure you are expecting, ensure it is the type of failure you expected and make that "failure" to become a true fail, ie) a unit test pass. - -An example is in `unit_tests/gms/Derive_level_paths_test.py` -> `test_Derive_level_paths_invalid_input_stream_network` (function). This example gives you the pattern implemented in Pytest. - -## Future Enhancements -1) Full transition to the `pytest` library, removing classes of `unittest.TestCase` and taking full advantage of available code re-use patterns offered through `pytest`. - -2) Over time, it is expected that python files will be broken down to many functions inside the file. Currently, we tend to have one very large function in each python file which makes unit testing harder and less specific. Generally function will result in at least one "happy path" unit test function. This might require having test unit test outputs, such as sample .tif or small .gpkg files in subfolders in the unit tests folder, but this remains to be seen. Note: The files `/gms/derive_level_paths_test.py` and `clip_vectors_to_wbd_test.py` are not complete as they do not yet test all output from a method. - -## Unit tests currently available -``` -pytest /foss_fim/unit_tests/gms/derive_level_paths_test.py -pytest /foss_fim/unit_tests/gms/outputs_cleanup_test.py -pytest /foss_fim/unit_tests/tools/inundate_gms_test.py -pytest /foss_fim/unit_tests/tools/inundation_test.py -pytest /foss_fim/unit_tests/check_unit_errors_test.py -pytest /foss_fim/unit_tests/clip_vectors_to_wbd_test.py -pytest /foss_fim/unit_tests/filter_catchments_and_add_attributes_test.py -pytest /foss_fim/unit_tests/rating_curve_comparison_test.py -pytest /foss_fim/unit_tests/shared_functions_test.py -pytest /foss_fim/unit_tests/split_flows_test.py -pytest /foss_fim/unit_tests/usgs_gage_crosswalk_test.py -pytest /foss_fim/unit_tests/aggregate_branch_lists_test.py -pytest /foss_fim/unit_tests/generate_branch_list_csv_test.py -pytest /foss_fim/unit_tests/generate_branch_list_test.py -``` From ae713f7b6e35ee1fb81ecc25da03040bbfb708ce Mon Sep 17 00:00:00 2001 From: Carson Pruitt <90792257+CarsonPruitt-NOAA@users.noreply.github.com> Date: Fri, 17 Feb 2023 15:22:08 -0600 Subject: [PATCH 11/11] Update CHANGELOG.md --- docs/CHANGELOG.md | 87 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7d4cacd2a..95a20c710 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,8 +2,7 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. -## v4.1.(pending) - 2023-02-16 - [PR#816](https://github.com/NOAA-OWP/inundation-mapping/pull/816) - +## v4.2.0.0 - 2023-02-16 - [PR#816](https://github.com/NOAA-OWP/inundation-mapping/pull/816) This update removes the remaining elements of FIM3 code. It further removes the phrases "GMS" as basically the entire FIM4 model. FIM4 is GMS. With removing FIM3, it also means remove concepts of "MS" and "FR" which were no longer relevant in FIM4. There are only a few remaining places that will continue with the phrase "GMS" which is in some inundation files which are being re-evaluated. Some deprecated files have been removed and some subfolders removed. @@ -110,6 +109,90 @@ There are a lot of duplicate explanations for some of the changes, so here is a - `tools` - `inundate_gms_params.json` and `inundate_gms_unittests.py`: see desc 1 and desc 2 +

+## v4.1.3.0 - 2023-02-13 - [PR#812](https://github.com/NOAA-OWP/inundation-mapping/pull/812) + +An update was required to adjust host name when in the AWS environment + +### Changes + +- `fim_post_processing.sh`: Added an "if isAWS" flag system based on the input command args from fim_pipeline.sh or + +- `tools/calibration-db` + - `README.md`: Minor text correction. + +

+ +## v4.1.2.0 - 2023-02-15 - [PR#808](https://github.com/NOAA-OWP/inundation-mapping/pull/808) + +Add `pytest` package and refactor existing unit tests. Update parameters to unit tests (`/unit_tests/*_params.json`) to valid paths. Add leading slash to paths in `/config/params_template.env`. + +### Additions + +- `/unit_tests` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `pyproject.toml` - used to specify which warnings are excluded/filtered. + - `/gms` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `/tools` + - `__init__.py` - needed for `pytest` command line executable to pick up tests. + - `inundate_gms_params.json` - file moved up into this directory + - `inundate_gms_test.py` - file moved up into this directory + - `inundation_params.json` - file moved up into this directory + - `inundation_test.py` - file moved up into this directory + +### Removals + +- `/unit_tests/tools/gms_tools/` directory removed, and files moved up into `/unit_tests/tools` + +### Changes + +- `Pipfile` - updated to include pytest as a dependency +- `Pipfile.lock` - updated to include pytest as a dependency + +- `/config` + - `params_template.env` - leading slash added to paths + +- `/unit_tests/` - All of the `*_test.py` files were refactored to follow the `pytest` paradigm. + - `*_params.json` - valid paths on `fim-dev1` provided + - `README.md` - updated to include documentation on pytest. + - `unit_tests_utils.py` + - `__template_unittests.py` -> `__template.py` - exclude the `_test` suffix to remove from test suite. Updated example on new format for pytest. + - `check_unit_errors_test.py` + - `clip_vectors_to_wbd_test.py` + - `filter_catchments_and_add_attributes_test.py` + - `rating_curve_comparison_test.py` + - `shared_functions_test.py` + - `split_flow_test.py` + - `usgs_gage_crosswalk_test.py` + - `aggregate_branch_lists_test.py` + - `generate_branch_list_test.py` + - `generate_branch_list_csv_test.py` + - `aggregate_branch_lists_test.py` + - `generate_branch_list_csv_test.py` + - `generate_branch_list_test.py` + - `/gms` + - `derive_level_paths_test.py` + - `outputs_cleanup_test.py` + - `/tools` + - `inundate_unittests.py` -> `inundation_test.py` + - `inundate_gms_test.py` + + +

+ +## v4.1.1.0 - 2023-02-16 - [PR#809](https://github.com/NOAA-OWP/inundation-mapping/pull/809) + +The CatFIM code was updated to allow 1-foot interval processing across all stage-based AHPS sites ranging from action stage to 5 feet above major stage, along with restart capability for interrupted processing runs. + +### Changes + +- `tools/generate_categorical_fim.py` (all changes made here) + - Added try-except blocks for code that didn't allow most sites to actually get processed because it was trying to check values of some USGS-related variables that most of the sites didn't have + - Overwrite abilities of the different outputs for the viz team were not consistent (i.e., one of the files had the ability to be overwritten but another didn't), so that has been made consistent to disallow any overwrites of the existing final outputs for a specified output folder. + - The code also has the ability to restart from an interrupted run and resume processing uncompleted HUCs by first checking for a simple "complete" file for each HUC. If a HUC has that file, then it is skipped (because it already completed processing during a run for a particular output folder / run name). + - When a HUC is successfully processed, an empty "complete" text file is created / touched. +

## v4.1.0.0 - 2023-01-30 - [PR#806](https://github.com/NOAA-OWP/inundation-mapping/pull/806)