FIM 4 USGS gage crosswalk and Sierra Test

See Changelog v4.0.3.0 for full details. This resolves #539.
NOAA-OWP · Mar 4, 2022 · b11f981 · b11f981
1 parent 45c7d6c
commit b11f981
Show file tree

Hide file tree

Showing 15 changed files with 1,036 additions and 150 deletions.
diff --git a/config/deny_gms_branches_default.lst b/config/deny_gms_branches_default.lst
@@ -56,3 +56,4 @@ src_base_{}.csv
 stage_{}.txt
 streamOrder_{}.tif
 treeFile_{}.txt
+#usgs_elev_table.csv
diff --git a/config/deny_gms_branches_min.lst b/config/deny_gms_branches_min.lst
@@ -56,3 +56,4 @@ src_base_{}.csv
 stage_{}.txt
 streamOrder_{}.tif
 treeFile_{}.txt
+#usgs_elev_table.csv
diff --git a/config/deny_gms_unit_default.lst b/config/deny_gms_unit_default.lst
@@ -13,6 +13,7 @@ nwm_subset_streams.gpkg
 #nwm_subset_streams_levelPaths.gpkg
 #nwm_subset_streams_levelPaths_dissolved.gpkg
 #nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg
+#usgs_subset_gages.gpkg
 #wbd.gpkg
 #wbd8_clp.gpkg
 wbd_buffered.gpkg
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,33 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.0.3.0 - 2022-03-03 - [PR #550](https://github.com/NOAA-OWP/inundation-mapping/pull/550)
+
+This PR ports the functionality of `usgs_gage_crosswalk.py` and `rating_curve_comparison.py` to FIM 4.
+
+## Additions
+
+- `src/`:
+    - `usgs_gage_aggregate.py`: Aggregates all instances of `usgs_elev_table.csv` to the HUC level. This makes it easier to view the gages in each HUC without having to hunt through branch folders and easier for the Sierra Test to run at the HUC level.
+    - `usgs_gage_unit_setup.py`: Assigns a branch to each USGS gage within a unit. The output of this module is `usgs_subset_gages.gpkg` at the HUC level containing the `levpa_id` attribute.
+
+## Changes
+
+- `gms_run_branch.sh`: Added a line to aggregate all `usgs_elev_table.csv` into the HUC directory level using `src/usgs_gage_aggregate.py`.
+- `src/`:
+    -  `gms/`
+        - `run_by_branch.sh`: Added a block to run `src/usgs_gage_crosswalk.py`. 
+        - `run_by_unit.sh`: Added a block to run `src/usgs_gage_unit_setup.py`.
+    - `usgs_gage_crosswalk.py`: Similar to it's functionality in FIM 3, this module snaps USGS gages to the stream network, samples the underlying DEMs, and writes the attributes to `usgs_elev_table.csv`. This CSV is later aggregated to the HUC level and eventually used in `tools/rating_curve_comparison.py`. Addresses #539 
+- `tools/rating_curve_comparison.py`: Updated Sierra Test to work with FIM 4 data structure.
+- `unit_tests/`:
+    - `rating_curve_comparison_unittests.py` & `rating_curve_comparison_params.json`: Unit test code and parameters for the Sierra Test.
+    - `usgs_gage_crosswalk_unittests.py` & `usgs_gage_crosswalk_params.json`: Unit test code and parameters for `usgs_gage_crosswalk.py`
+- `config/`:
+    - `deny_gms_branches_default.lst` & `config/deny_gms_branches_min.lst`: Add `usgs_elev_table.csv` to the lists as a comment so it doesn't get deleted during cleanup.
+    - `deny_gms_unit_default.lst`: Add `usgs_subset_gages.gpkg` to the lists as a comment so it doesn't get deleted during cleanup.
+
+<br/><br/>
 
 ## v4.0.2.0 - 2022-03-02 - [PR #548](https://github.com/NOAA-OWP/inundation-mapping/pull/548)
 

diff --git a/gms_run_branch.sh b/gms_run_branch.sh
@@ -131,6 +131,8 @@ else
     parallel $retry --eta --timeout $branch_timeout -j $jobLimit --joblog $logFile --colsep ',' -- $srcDir/gms/time_and_tee_run_by_branch.sh :::: $gms_inputs
 fi
 
+## RUN AGGREGATE BRANCH ELEV TABLES ##
+python3 $srcDir/usgs_gage_aggregate.py -fim $outputRunDataDir -gms $gms_inputs
 
 # -------------------
 ## GET NON ZERO EXIT CODES ##

diff --git a/src/gms/run_by_branch.sh b/src/gms/run_by_branch.sh
@@ -282,6 +282,15 @@ Tstart
 python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $outputCurrentBranchDataDir/nwm_catchments_proj_subset_levelPaths_$current_branch_id.gpkg -p $extent -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv
 Tcount
 
+## USGS CROSSWALK ##
+if [ -f $outputHucDataDir/usgs_subset_gages.gpkg ]; then
+    echo -e $startDiv"USGS Crosswalk $hucNumber $current_branch_id"$stopDiv
+    date -u
+    Tstart
+    python3 $srcDir/usgs_gage_crosswalk.py -gages $outputHucDataDir/usgs_subset_gages.gpkg -flows $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -cat $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -dem_adj $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -outtable $outputCurrentBranchDataDir/usgs_elev_table.csv -b $current_branch_id
+    Tcount
+fi
+
 ## REMOVE FILES FROM DENY LIST ##
 if [ -f $deny_gms_branches_list ]; then
     echo -e $startDiv"Remove files $hucNumber $current_branch_id"$stopDiv

diff --git a/src/gms/run_by_unit.sh b/src/gms/run_by_unit.sh
@@ -84,6 +84,13 @@ Tstart
 $srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute
 Tcount
 
+## CREATE USGS GAGES FILE
+echo -e $startDiv"Assigning USGS gages to branches for $hucNumber"$stopDiv
+date -u
+Tstart
+python3 -m memory_profiler $srcDir/usgs_gage_unit_setup.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -nwm $outputHucDataDir/nwm_subset_streams_levelPaths.gpkg -o $outputHucDataDir/usgs_subset_gages.gpkg -huc $hucNumber -ahps $inputDataDir/ahps_sites/nws_lid.gpkg
+Tcount
+
 ## REMOVE FILES FROM DENY LIST ##
 if [ -f $deny_gms_unit_list ]; then
     echo -e $startDiv"Remove files $hucNumber"$stopDiv

diff --git a/src/usgs_gage_aggregate.py b/src/usgs_gage_aggregate.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+import os
+from os.path import join
+import pandas as pd
+import re
+import argparse
+
+class HucDirectory(object):
+
+    def __init__(self, path, limit_branches=[]):
+
+        self.dir = path
+        self.name = os.path.basename(path)
+        self.limit_branches = limit_branches
+
+        self.usgs_dtypes = {'location_id':str,
+                            'nws_lid':str,
+                            'feature_id':int,
+                            'HydroID':int,
+                            'levpa_id':str,
+                            'dem_elevation':float,
+                            'dem_adj_elevation':float,
+                            'order_':int,
+                            'LakeID':object,
+                            'HUC8':str,
+                            'snap_distance':float}
+        self.agg_usgs_elev_table = pd.DataFrame(columns=list(self.usgs_dtypes.keys()))
+
+    def iter_branches(self):
+
+        if self.limit_branches:
+            for branch in self.limit_branches:
+                yield (branch, join(self.dir, 'branches', branch))
+
+        else:
+            for branch in os.listdir(join(self.dir, 'branches')):
+                yield (branch, join(self.dir, 'branches', branch))
+
+    def usgs_elev_table(self, branch_path):
+
+        usgs_elev_filename = join(branch_path, 'usgs_elev_table.csv')
+        if not os.path.isfile(usgs_elev_filename):
+            return
+
+        usgs_elev_table = pd.read_csv(usgs_elev_filename, dtype=self.usgs_dtypes)
+        self.agg_usgs_elev_table = self.agg_usgs_elev_table.append(usgs_elev_table)
+
+
+    def agg_function(self):
+
+        for branch_id, branch_path in self.iter_branches():
+
+            self.usgs_elev_table(branch_path)
+
+            ## Other aggregate funtions can go here
+
+        ## After all of the branches are visited, the code below will write the aggregates
+        if os.path.isfile(join(self.dir, 'usgs_elev_table.csv')):
+            os.remove(join(self.dir, 'usgs_elev_table.csv'))
+
+        if not self.agg_usgs_elev_table.empty:
+            self.agg_usgs_elev_table.to_csv(join(self.dir, 'usgs_elev_table.csv'), index=False)
+
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Aggregates usgs_elev_table.csv at the HUC level')
+    parser.add_argument('-fim','--fim_directory', help='Input FIM Directory', required=True)
+    parser.add_argument('-gms','--gms_inputs', help='Input gms_inputs CSV file', required=False)
+
+    args = vars(parser.parse_args())
+
+    fim_directory = args['fim_directory']
+    gms_inputs = args['gms_inputs']
+    assert os.path.isdir(fim_directory), f'{fim_directory} is not a valid directory'
+
+    if gms_inputs:
+        gms_inputs = pd.read_csv(gms_inputs, header=None, names=['huc', 'levpa_id'],dtype=str)
+
+        for huc in gms_inputs.huc.unique():
+
+            branches = gms_inputs.loc[gms_inputs.huc == huc, 'levpa_id'].tolist()
+            huc = HucDirectory(join(fim_directory, huc), limit_branches=branches)
+            huc.agg_function()
+
+    else:
+        for huc_dir in [d for d in os.listdir(fim_directory) if re.match('\d{8}', d)]:
+
+            huc = HucDirectory(join(fim_directory, huc_dir))
+            huc.agg_function()
+
+
+
+
+
+