natcap · emlys · Jun 22, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -44,12 +44,16 @@ Unreleased Changes
     * Updated the package installation instructions in the API docs for clarity
       and also to highlight the ease of installation through ``conda-forge``.
       https://github.com/natcap/invest/issues/1256
-    * ``utils.build_lookup_from_csv`` now accepts kwargs for ``pandas.read_csv``
-      (`#1319 <https://github.com/natcap/invest/issues/1319>`_)
+    * ``utils.build_lookup_from_csv`` has been deprecated and its functionality
+      has been merged into ``utils.read_csv_to_dataframe``
+      (`#1319 <https://github.com/natcap/invest/issues/1319>`_),
+      (`#1327 <https://github.com/natcap/invest/issues/1327>`_)
 * Workbench
     * Fixed a bug where sampledata downloads failed silently (and progress bar
       became innacurate) if the Workbench did not have write permission to
       the download location. https://github.com/natcap/invest/issues/1070
+* Forest Carbon
+    * The biophysical table is now case-insensitive.
 * HRA
     * Fixed a bug in HRA where the model would error when all exposure and
       consequence criteria were skipped for a single habitat. The model now

diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py
@@ -517,8 +517,8 @@ def execute(args):
             'Checking that watersheds have entries for every `ws_id` in the '
             'valuation table.')
         # Open/read in valuation parameters from CSV file
-        valuation_params = utils.build_lookup_from_csv(
-            args['valuation_table_path'], 'ws_id')
+        valuation_params = utils.read_csv_to_dataframe(
+            args['valuation_table_path'], 'ws_id').to_dict(orient='index')
         watershed_vector = gdal.OpenEx(
             args['watersheds_path'], gdal.OF_VECTOR)
         watershed_layer = watershed_vector.GetLayer()
@@ -636,15 +636,15 @@ def execute(args):
         'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}
 
     # Open/read in the csv file into a dictionary and add to arguments
-    bio_dict = utils.build_lookup_from_csv(
-        args['biophysical_table_path'], 'lucode', to_lower=True)
+    bio_dict = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
     bio_lucodes = set(bio_dict.keys())
     bio_lucodes.add(nodata_dict['lulc'])
     LOGGER.debug(f'bio_lucodes: {bio_lucodes}')
 
     if 'demand_table_path' in args and args['demand_table_path'] != '':
-        demand_dict = utils.build_lookup_from_csv(
-            args['demand_table_path'], 'lucode')
+        demand_dict = utils.read_csv_to_dataframe(
+            args['demand_table_path'], 'lucode').to_dict(orient='index')
         demand_reclassify_dict = dict(
             [(lucode, demand_dict[lucode]['demand'])
              for lucode in demand_dict])

diff --git a/src/natcap/invest/carbon.py b/src/natcap/invest/carbon.py
@@ -366,8 +366,8 @@ def execute(args):
          (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
          (_TMP_BASE_FILES, output_dir)], file_suffix)
 
-    carbon_pool_table = utils.build_lookup_from_csv(
-        args['carbon_pools_path'], 'lucode')
+    carbon_pool_table = utils.read_csv_to_dataframe(
+        args['carbon_pools_path'], 'lucode').to_dict(orient='index')
 
     work_token_dir = os.path.join(
         intermediate_output_dir, '_taskgraph_working_dir')

diff --git a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@@ -584,8 +584,8 @@ def execute(args):
 
     # We're assuming that the LULC initial variables and the carbon pool
     # transient table are combined into a single lookup table.
-    biophysical_parameters = utils.build_lookup_from_csv(
-        args['biophysical_table_path'], 'code')
+    biophysical_parameters = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], 'code').to_dict(orient='index')
 
     # LULC Classnames are critical to the transition mapping, so they must be
     # unique.  This check is here in ``execute`` because it's possible that
@@ -964,8 +964,9 @@ def execute(args):
         if args.get('use_price_table', False):
             prices = {
                 year: values['price'] for (year, values) in
-                utils.build_lookup_from_csv(
-                    args['price_table_path'], 'year').items()}
+                utils.read_csv_to_dataframe(
+                    args['price_table_path'], 'year'
+                ).to_dict(orient='index').items()}
         else:
             inflation_rate = float(args['inflation_rate']) * 0.01
             annual_price = float(args['price'])
@@ -1985,7 +1986,8 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
         landcover transition, and the second contains accumulation rates for
         the pool for the landcover transition.
     """
-    table = utils.read_csv_to_dataframe(transition_csv_path, index_col=False)
+    table = utils.read_csv_to_dataframe(
+        transition_csv_path, cols_to_lower=False, vals_to_lower=False)
 
     lulc_class_to_lucode = {}
     max_lucode = 0
@@ -2239,8 +2241,7 @@ def _extract_snapshots_from_table(csv_path):
 
     """
     table = utils.read_csv_to_dataframe(
-        csv_path, to_lower=True, index_col=False,
-        expand_path_cols=['raster_path'])
+        csv_path, vals_to_lower=False, expand_path_cols=['raster_path'])
 
     output_dict = {}
     table.set_index("snapshot_year", drop=False, inplace=True)

diff --git a/src/natcap/invest/coastal_blue_carbon/preprocessor.py b/src/natcap/invest/coastal_blue_carbon/preprocessor.py
@@ -209,8 +209,8 @@ def execute(args):
         target_path_list=aligned_snapshot_paths,
         task_name='Align input landcover rasters')
 
-    landcover_table = utils.build_lookup_from_csv(
-        args['lulc_lookup_table_path'], 'code')
+    landcover_table = utils.read_csv_to_dataframe(
+        args['lulc_lookup_table_path'], 'code').to_dict(orient='index')
 
     target_transition_table = os.path.join(
         output_dir, TRANSITION_TABLE.format(suffix=suffix))

diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py
@@ -2315,7 +2315,7 @@ def _schedule_habitat_tasks(
 
     """
     habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, to_lower=True, expand_path_cols=['path'])
+        habitat_table_path, vals_to_lower=False, expand_path_cols=['path'])
     habitat_dataframe = habitat_dataframe.rename(
         columns={'protection distance (m)': 'distance'})
 
@@ -2834,7 +2834,8 @@ def assemble_results_and_calculate_exposure(
             with open(pickle_path, 'rb') as file:
                 final_values_dict[var_name] = pickle.load(file)
 
-    habitat_df = utils.read_csv_to_dataframe(habitat_protection_path)
+    habitat_df = utils.read_csv_to_dataframe(
+        habitat_protection_path, cols_to_lower=False, vals_to_lower=False)
     output_layer.StartTransaction()
     for feature in output_layer:
         shore_id = feature.GetField(SHORE_ID_FIELD)
@@ -3464,7 +3465,8 @@ def _validate_habitat_table_paths(habitat_table_path):
         ValueError if any vector in the ``path`` column cannot be opened.
     """
     habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, expand_path_cols=['path'])
+        habitat_table_path, cols_to_lower=False, vals_to_lower=False,
+        expand_path_cols=['path'])
     bad_paths = []
     for habitat_row in habitat_dataframe.itertuples():
         try:

diff --git a/src/natcap/invest/crop_production_percentile.py b/src/natcap/invest/crop_production_percentile.py
@@ -458,8 +458,8 @@ def execute(args):
         None.
 
     """
-    crop_to_landcover_table = utils.build_lookup_from_csv(
-        args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
+    crop_to_landcover_table = utils.read_csv_to_dataframe(
+        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
     bad_crop_name_list = []
     for crop_name in crop_to_landcover_table:
         crop_climate_bin_raster_path = os.path.join(
@@ -540,8 +540,8 @@ def execute(args):
         climate_percentile_yield_table_path = os.path.join(
             args['model_data_path'],
             _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
-        crop_climate_percentile_table = utils.build_lookup_from_csv(
-            climate_percentile_yield_table_path, 'climate_bin', to_lower=True)
+        crop_climate_percentile_table = utils.read_csv_to_dataframe(
+            climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index')
         yield_percentile_headers = [
             x for x in list(crop_climate_percentile_table.values())[0]
             if x != 'climate_bin']
@@ -698,9 +698,10 @@ def execute(args):
 
     # both 'crop_nutrient.csv' and 'crop' are known data/header values for
     # this model data.
-    nutrient_table = utils.build_lookup_from_csv(
+    nutrient_table = utils.read_csv_to_dataframe(
         os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', to_lower=False)
+        'crop', cols_to_lower=False, vals_to_lower=False
+        ).to_dict(orient='index')
     result_table_path = os.path.join(
         output_dir, 'result_table%s.csv' % file_suffix)
 

diff --git a/src/natcap/invest/crop_production_regression.py b/src/natcap/invest/crop_production_regression.py
@@ -484,11 +484,11 @@ def execute(args):
 
     LOGGER.info(
         "Checking if the landcover raster is missing lucodes")
-    crop_to_landcover_table = utils.build_lookup_from_csv(
-        args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
+    crop_to_landcover_table = utils.read_csv_to_dataframe(
+        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
 
-    crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
-        args['fertilization_rate_table_path'], 'crop_name', to_lower=True)
+    crop_to_fertlization_rate_table = utils.read_csv_to_dataframe(
+        args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index')
 
     crop_lucodes = [
         x[_EXPECTED_LUCODE_TABLE_HEADER]
@@ -571,8 +571,8 @@ def execute(args):
         crop_regression_table_path = os.path.join(
             args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)
 
-        crop_regression_table = utils.build_lookup_from_csv(
-            crop_regression_table_path, 'climate_bin', to_lower=True)
+        crop_regression_table = utils.read_csv_to_dataframe(
+            crop_regression_table_path, 'climate_bin').to_dict(orient='index')
         for bin_id in crop_regression_table:
             for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
                 if crop_regression_table[bin_id][header.lower()] == '':
@@ -796,9 +796,10 @@ def execute(args):
 
     # both 'crop_nutrient.csv' and 'crop' are known data/header values for
     # this model data.
-    nutrient_table = utils.build_lookup_from_csv(
+    nutrient_table = utils.read_csv_to_dataframe(
         os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', to_lower=False)
+        'crop', cols_to_lower=False, vals_to_lower=False
+        ).to_dict(orient='index')
 
     LOGGER.info("Generating report table")
     result_table_path = os.path.join(

diff --git a/src/natcap/invest/datastack.py b/src/natcap/invest/datastack.py
@@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path):
                     data_dir, f'{key}_csv_data')
 
                 dataframe = utils.read_csv_to_dataframe(
-                    source_path, to_lower=True)
+                    source_path, vals_to_lower=False)
                 csv_source_dir = os.path.abspath(os.path.dirname(source_path))
                 for spatial_column_name in spatial_columns:
                     # Iterate through the spatial columns, identify the set of

diff --git a/src/natcap/invest/forest_carbon_edge_effect.py b/src/natcap/invest/forest_carbon_edge_effect.py
@@ -418,8 +418,8 @@ def execute(args):
     # Map non-forest landcover codes to carbon biomasses
     LOGGER.info('Calculating direct mapped carbon stocks')
     carbon_maps = []
-    biophysical_table = utils.build_lookup_from_csv(
-        args['biophysical_table_path'], 'lucode', to_lower=False)
+    biophysical_table = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
     biophysical_keys = [
         x.lower() for x in list(biophysical_table.values())[0].keys()]
     pool_list = [('c_above', True)]
@@ -630,8 +630,8 @@ def _calculate_lulc_carbon_map(
 
     """
     # classify forest pixels from lulc
-    biophysical_table = utils.build_lookup_from_csv(
-        biophysical_table_path, 'lucode', to_lower=False)
+    biophysical_table = utils.read_csv_to_dataframe(
+        biophysical_table_path, 'lucode').to_dict(orient='index')
 
     lucode_to_per_cell_carbon = {}
     cell_size = pygeoprocessing.get_raster_info(
@@ -696,8 +696,8 @@ def _map_distance_from_tropical_forest_edge(
 
     """
     # Build a list of forest lucodes
-    biophysical_table = utils.build_lookup_from_csv(
-        biophysical_table_path, 'lucode', to_lower=False)
+    biophysical_table = utils.read_csv_to_dataframe(
+        biophysical_table_path, 'lucode').to_dict(orient='index')
     forest_codes = [
         lucode for (lucode, ludata) in biophysical_table.items()
         if int(ludata['is_tropical_forest']) == 1]

diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py
@@ -380,11 +380,12 @@ def execute(args):
     LOGGER.info("Checking Threat and Sensitivity tables for compliance")
     # Get CSVs as dictionaries and ensure the key is a string for threats.
     threat_dict = {
-        str(key): value for key, value in utils.build_lookup_from_csv(
-            args['threats_table_path'], 'THREAT', to_lower=True,
-            expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
-    sensitivity_dict = utils.build_lookup_from_csv(
-        args['sensitivity_table_path'], 'LULC', to_lower=True)
+        str(key): value for key, value in utils.read_csv_to_dataframe(
+            args['threats_table_path'], 'THREAT',
+            expand_path_cols=['cur_path', 'fut_path', 'base_path']
+            ).to_dict(orient='index').items()}
+    sensitivity_dict = utils.read_csv_to_dataframe(
+        args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
 
     half_saturation_constant = float(args['half_saturation_constant'])
 
@@ -1156,11 +1157,12 @@ def validate(args, limit_to=None):
 
         # Get CSVs as dictionaries and ensure the key is a string for threats.
         threat_dict = {
-            str(key): value for key, value in utils.build_lookup_from_csv(
-                args['threats_table_path'], 'THREAT', to_lower=True,
-                expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
-        sensitivity_dict = utils.build_lookup_from_csv(
-            args['sensitivity_table_path'], 'LULC', to_lower=True)
+            str(key): value for key, value in utils.read_csv_to_dataframe(
+                args['threats_table_path'], 'THREAT',
+                expand_path_cols=['cur_path', 'fut_path', 'base_path']
+                ).to_dict(orient='index').items()}
+        sensitivity_dict = utils.read_csv_to_dataframe(
+            args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
 
         # check that the threat names in the threats table match with the
         # threats columns in the sensitivity table.

diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py
@@ -1845,7 +1845,8 @@ def _open_table_as_dataframe(table_path, **kwargs):
         return excel_df
     else:
         return utils.read_csv_to_dataframe(
-            table_path, to_lower=True, expand_path_cols=['path'], **kwargs)
+            table_path, vals_to_lower=False,
+            expand_path_cols=['path'], **kwargs)
 
 
 def _parse_info_table(info_table_path):

diff --git a/src/natcap/invest/ndr/ndr.py b/src/natcap/invest/ndr/ndr.py
@@ -619,8 +619,8 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters):
         if args['calc_' + nutrient_id]:
             nutrients_to_process.append(nutrient_id)
 
-    lucode_to_parameters = utils.build_lookup_from_csv(
-        args['biophysical_table_path'], 'lucode')
+    lucode_to_parameters = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
 
     _validate_inputs(nutrients_to_process, lucode_to_parameters)
 

diff --git a/src/natcap/invest/pollination.py b/src/natcap/invest/pollination.py
@@ -1179,8 +1179,8 @@ def _parse_scenario_variables(args):
     else:
         farm_vector_path = None
 
-    guild_table = utils.build_lookup_from_csv(
-        guild_table_path, 'species', to_lower=True)
+    guild_table = utils.read_csv_to_dataframe(
+        guild_table_path, 'species').to_dict(orient='index')
 
     LOGGER.info('Checking to make sure guild table has all expected headers')
     guild_headers = list(guild_table.values())[0].keys()
@@ -1192,8 +1192,8 @@ def _parse_scenario_variables(args):
                 f"'{header}' but was unable to find one. Here are all the "
                 f"headers from {guild_table_path}: {', '.join(guild_headers)}")
 
-    landcover_biophysical_table = utils.build_lookup_from_csv(
-        landcover_biophysical_table_path, 'lucode', to_lower=True)
+    landcover_biophysical_table = utils.read_csv_to_dataframe(
+        landcover_biophysical_table_path, 'lucode').to_dict(orient='index')
     biophysical_table_headers = (
         list(landcover_biophysical_table.values())[0].keys())
     for header in _EXPECTED_BIOPHYSICAL_HEADERS:

diff --git a/src/natcap/invest/recreation/recmodel_client.py b/src/natcap/invest/recreation/recmodel_client.py
@@ -853,8 +853,9 @@ def _schedule_predictor_data_processing(
         'line_intersect_length': _line_intersect_length,
     }
 
-    predictor_table = utils.build_lookup_from_csv(
-        predictor_table_path, 'id', expand_path_cols=['path'])
+    predictor_table = utils.read_csv_to_dataframe(
+        predictor_table_path, 'id', expand_path_cols=['path']
+        ).to_dict(orient='index')
     predictor_task_list = []
     predictor_json_list = []  # tracks predictor files to add to shp
 
@@ -1546,7 +1547,8 @@ def _validate_same_id_lengths(table_path):
         tables.
 
     """
-    predictor_table = utils.build_lookup_from_csv(table_path, 'id')
+    predictor_table = utils.read_csv_to_dataframe(
+        table_path, 'id').to_dict(orient='index')
     too_long = set()
     for p_id in predictor_table:
         if len(p_id) > 10:
@@ -1579,11 +1581,11 @@ def _validate_same_ids_and_types(
         tables.
 
     """
-    predictor_table = utils.build_lookup_from_csv(
-        predictor_table_path, 'id')
+    predictor_table = utils.read_csv_to_dataframe(
+        predictor_table_path, 'id').to_dict(orient='index')
 
-    scenario_predictor_table = utils.build_lookup_from_csv(
-        scenario_predictor_table_path, 'id')
+    scenario_predictor_table = utils.read_csv_to_dataframe(
+        scenario_predictor_table_path, 'id').to_dict(orient='index')
 
     predictor_table_pairs = set([
         (p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table])
@@ -1616,7 +1618,7 @@ def _validate_same_projection(base_vector_path, table_path):
     # This will load the table as a list of paths which we can iterate through
     # without bothering the rest of the table structure
     data_paths = utils.read_csv_to_dataframe(
-        table_path, to_lower=True, expand_path_cols=['path']
+        table_path, vals_to_lower=False, expand_path_cols=['path']
     ).squeeze('columns')['path'].tolist()
 
     base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
@@ -1673,7 +1675,7 @@ def _validate_predictor_types(table_path):
         ValueError if any value in the ``type`` column does not match a valid
         type, ignoring leading/trailing whitespace.
     """
-    df = utils.read_csv_to_dataframe(table_path, to_lower=True)
+    df = utils.read_csv_to_dataframe(table_path, vals_to_lower=False)
     # ignore leading/trailing whitespace because it will be removed
     # when the type values are used
     type_list = set([type.strip() for type in df['type']])