Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove build_lookup_from_csv and consolidate into read_csv_to_dataframe #1334

Merged
merged 11 commits into from
Jun 22, 2023
8 changes: 6 additions & 2 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@ Unreleased Changes
* Updated the package installation instructions in the API docs for clarity
and also to highlight the ease of installation through ``conda-forge``.
https://github.com/natcap/invest/issues/1256
* ``utils.build_lookup_from_csv`` now accepts kwargs for ``pandas.read_csv``
(`#1319 <https://github.com/natcap/invest/issues/1319>`_)
* ``utils.build_lookup_from_csv`` has been deprecated and its functionality
has been merged into ``utils.read_csv_to_dataframe``
(`#1319 <https://github.com/natcap/invest/issues/1319>`_),
(`#1327 <https://github.com/natcap/invest/issues/1327>`_)
* Workbench
* Fixed a bug where sampledata downloads failed silently (and progress bar
became innacurate) if the Workbench did not have write permission to
the download location. https://github.com/natcap/invest/issues/1070
* Forest Carbon
* The biophysical table is now case-insensitive.
* HRA
* Fixed a bug in HRA where the model would error when all exposure and
consequence criteria were skipped for a single habitat. The model now
Expand Down
12 changes: 6 additions & 6 deletions src/natcap/invest/annual_water_yield.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ def execute(args):
'Checking that watersheds have entries for every `ws_id` in the '
'valuation table.')
# Open/read in valuation parameters from CSV file
valuation_params = utils.build_lookup_from_csv(
args['valuation_table_path'], 'ws_id')
valuation_params = utils.read_csv_to_dataframe(
args['valuation_table_path'], 'ws_id').to_dict(orient='index')
watershed_vector = gdal.OpenEx(
args['watersheds_path'], gdal.OF_VECTOR)
watershed_layer = watershed_vector.GetLayer()
Expand Down Expand Up @@ -636,15 +636,15 @@ def execute(args):
'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}

# Open/read in the csv file into a dictionary and add to arguments
bio_dict = utils.build_lookup_from_csv(
args['biophysical_table_path'], 'lucode', to_lower=True)
bio_dict = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
bio_lucodes = set(bio_dict.keys())
bio_lucodes.add(nodata_dict['lulc'])
LOGGER.debug(f'bio_lucodes: {bio_lucodes}')

if 'demand_table_path' in args and args['demand_table_path'] != '':
demand_dict = utils.build_lookup_from_csv(
args['demand_table_path'], 'lucode')
demand_dict = utils.read_csv_to_dataframe(
args['demand_table_path'], 'lucode').to_dict(orient='index')
demand_reclassify_dict = dict(
[(lucode, demand_dict[lucode]['demand'])
for lucode in demand_dict])
Expand Down
4 changes: 2 additions & 2 deletions src/natcap/invest/carbon.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_TMP_BASE_FILES, output_dir)], file_suffix)

carbon_pool_table = utils.build_lookup_from_csv(
args['carbon_pools_path'], 'lucode')
carbon_pool_table = utils.read_csv_to_dataframe(
args['carbon_pools_path'], 'lucode').to_dict(orient='index')

work_token_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
Expand Down
15 changes: 8 additions & 7 deletions src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,8 @@ def execute(args):

# We're assuming that the LULC initial variables and the carbon pool
# transient table are combined into a single lookup table.
biophysical_parameters = utils.build_lookup_from_csv(
args['biophysical_table_path'], 'code')
biophysical_parameters = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'code').to_dict(orient='index')

# LULC Classnames are critical to the transition mapping, so they must be
# unique. This check is here in ``execute`` because it's possible that
Expand Down Expand Up @@ -964,8 +964,9 @@ def execute(args):
if args.get('use_price_table', False):
prices = {
year: values['price'] for (year, values) in
utils.build_lookup_from_csv(
args['price_table_path'], 'year').items()}
utils.read_csv_to_dataframe(
args['price_table_path'], 'year'
).to_dict(orient='index').items()}
else:
inflation_rate = float(args['inflation_rate']) * 0.01
annual_price = float(args['price'])
Expand Down Expand Up @@ -1985,7 +1986,8 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
landcover transition, and the second contains accumulation rates for
the pool for the landcover transition.
"""
table = utils.read_csv_to_dataframe(transition_csv_path, index_col=False)
table = utils.read_csv_to_dataframe(
transition_csv_path, cols_to_lower=False, vals_to_lower=False)

lulc_class_to_lucode = {}
max_lucode = 0
Expand Down Expand Up @@ -2239,8 +2241,7 @@ def _extract_snapshots_from_table(csv_path):

"""
table = utils.read_csv_to_dataframe(
csv_path, to_lower=True, index_col=False,
expand_path_cols=['raster_path'])
csv_path, vals_to_lower=False, expand_path_cols=['raster_path'])

output_dict = {}
table.set_index("snapshot_year", drop=False, inplace=True)
Expand Down
4 changes: 2 additions & 2 deletions src/natcap/invest/coastal_blue_carbon/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ def execute(args):
target_path_list=aligned_snapshot_paths,
task_name='Align input landcover rasters')

landcover_table = utils.build_lookup_from_csv(
args['lulc_lookup_table_path'], 'code')
landcover_table = utils.read_csv_to_dataframe(
args['lulc_lookup_table_path'], 'code').to_dict(orient='index')

target_transition_table = os.path.join(
output_dir, TRANSITION_TABLE.format(suffix=suffix))
Expand Down
8 changes: 5 additions & 3 deletions src/natcap/invest/coastal_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -2315,7 +2315,7 @@ def _schedule_habitat_tasks(

"""
habitat_dataframe = utils.read_csv_to_dataframe(
habitat_table_path, to_lower=True, expand_path_cols=['path'])
habitat_table_path, vals_to_lower=False, expand_path_cols=['path'])
habitat_dataframe = habitat_dataframe.rename(
columns={'protection distance (m)': 'distance'})

Expand Down Expand Up @@ -2834,7 +2834,8 @@ def assemble_results_and_calculate_exposure(
with open(pickle_path, 'rb') as file:
final_values_dict[var_name] = pickle.load(file)

habitat_df = utils.read_csv_to_dataframe(habitat_protection_path)
habitat_df = utils.read_csv_to_dataframe(
habitat_protection_path, cols_to_lower=False, vals_to_lower=False)
output_layer.StartTransaction()
for feature in output_layer:
shore_id = feature.GetField(SHORE_ID_FIELD)
Expand Down Expand Up @@ -3464,7 +3465,8 @@ def _validate_habitat_table_paths(habitat_table_path):
ValueError if any vector in the ``path`` column cannot be opened.
"""
habitat_dataframe = utils.read_csv_to_dataframe(
habitat_table_path, expand_path_cols=['path'])
habitat_table_path, cols_to_lower=False, vals_to_lower=False,
expand_path_cols=['path'])
bad_paths = []
for habitat_row in habitat_dataframe.itertuples():
try:
Expand Down
13 changes: 7 additions & 6 deletions src/natcap/invest/crop_production_percentile.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,8 @@ def execute(args):
None.

"""
crop_to_landcover_table = utils.build_lookup_from_csv(
args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
crop_to_landcover_table = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
bad_crop_name_list = []
for crop_name in crop_to_landcover_table:
crop_climate_bin_raster_path = os.path.join(
Expand Down Expand Up @@ -540,8 +540,8 @@ def execute(args):
climate_percentile_yield_table_path = os.path.join(
args['model_data_path'],
_CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
crop_climate_percentile_table = utils.build_lookup_from_csv(
climate_percentile_yield_table_path, 'climate_bin', to_lower=True)
crop_climate_percentile_table = utils.read_csv_to_dataframe(
climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index')
yield_percentile_headers = [
x for x in list(crop_climate_percentile_table.values())[0]
if x != 'climate_bin']
Expand Down Expand Up @@ -698,9 +698,10 @@ def execute(args):

# both 'crop_nutrient.csv' and 'crop' are known data/header values for
# this model data.
nutrient_table = utils.build_lookup_from_csv(
nutrient_table = utils.read_csv_to_dataframe(
os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
'crop', to_lower=False)
'crop', cols_to_lower=False, vals_to_lower=False
).to_dict(orient='index')
result_table_path = os.path.join(
output_dir, 'result_table%s.csv' % file_suffix)

Expand Down
17 changes: 9 additions & 8 deletions src/natcap/invest/crop_production_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,11 @@ def execute(args):

LOGGER.info(
"Checking if the landcover raster is missing lucodes")
crop_to_landcover_table = utils.build_lookup_from_csv(
args['landcover_to_crop_table_path'], 'crop_name', to_lower=True)
crop_to_landcover_table = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')

crop_to_fertlization_rate_table = utils.build_lookup_from_csv(
args['fertilization_rate_table_path'], 'crop_name', to_lower=True)
crop_to_fertlization_rate_table = utils.read_csv_to_dataframe(
args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index')

crop_lucodes = [
x[_EXPECTED_LUCODE_TABLE_HEADER]
Expand Down Expand Up @@ -571,8 +571,8 @@ def execute(args):
crop_regression_table_path = os.path.join(
args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

crop_regression_table = utils.build_lookup_from_csv(
crop_regression_table_path, 'climate_bin', to_lower=True)
crop_regression_table = utils.read_csv_to_dataframe(
crop_regression_table_path, 'climate_bin').to_dict(orient='index')
for bin_id in crop_regression_table:
for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
if crop_regression_table[bin_id][header.lower()] == '':
Expand Down Expand Up @@ -796,9 +796,10 @@ def execute(args):

# both 'crop_nutrient.csv' and 'crop' are known data/header values for
# this model data.
nutrient_table = utils.build_lookup_from_csv(
nutrient_table = utils.read_csv_to_dataframe(
os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
'crop', to_lower=False)
'crop', cols_to_lower=False, vals_to_lower=False
).to_dict(orient='index')

LOGGER.info("Generating report table")
result_table_path = os.path.join(
Expand Down
2 changes: 1 addition & 1 deletion src/natcap/invest/datastack.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path):
data_dir, f'{key}_csv_data')

dataframe = utils.read_csv_to_dataframe(
source_path, to_lower=True)
source_path, vals_to_lower=False)
csv_source_dir = os.path.abspath(os.path.dirname(source_path))
for spatial_column_name in spatial_columns:
# Iterate through the spatial columns, identify the set of
Expand Down
12 changes: 6 additions & 6 deletions src/natcap/invest/forest_carbon_edge_effect.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,8 @@ def execute(args):
# Map non-forest landcover codes to carbon biomasses
LOGGER.info('Calculating direct mapped carbon stocks')
carbon_maps = []
biophysical_table = utils.build_lookup_from_csv(
args['biophysical_table_path'], 'lucode', to_lower=False)
biophysical_table = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
biophysical_keys = [
x.lower() for x in list(biophysical_table.values())[0].keys()]
pool_list = [('c_above', True)]
Expand Down Expand Up @@ -630,8 +630,8 @@ def _calculate_lulc_carbon_map(

"""
# classify forest pixels from lulc
biophysical_table = utils.build_lookup_from_csv(
biophysical_table_path, 'lucode', to_lower=False)
biophysical_table = utils.read_csv_to_dataframe(
biophysical_table_path, 'lucode').to_dict(orient='index')

lucode_to_per_cell_carbon = {}
cell_size = pygeoprocessing.get_raster_info(
Expand Down Expand Up @@ -696,8 +696,8 @@ def _map_distance_from_tropical_forest_edge(

"""
# Build a list of forest lucodes
biophysical_table = utils.build_lookup_from_csv(
biophysical_table_path, 'lucode', to_lower=False)
biophysical_table = utils.read_csv_to_dataframe(
biophysical_table_path, 'lucode').to_dict(orient='index')
forest_codes = [
lucode for (lucode, ludata) in biophysical_table.items()
if int(ludata['is_tropical_forest']) == 1]
Expand Down
22 changes: 12 additions & 10 deletions src/natcap/invest/habitat_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,12 @@ def execute(args):
LOGGER.info("Checking Threat and Sensitivity tables for compliance")
# Get CSVs as dictionaries and ensure the key is a string for threats.
threat_dict = {
str(key): value for key, value in utils.build_lookup_from_csv(
args['threats_table_path'], 'THREAT', to_lower=True,
expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
sensitivity_dict = utils.build_lookup_from_csv(
args['sensitivity_table_path'], 'LULC', to_lower=True)
str(key): value for key, value in utils.read_csv_to_dataframe(
args['threats_table_path'], 'THREAT',
expand_path_cols=['cur_path', 'fut_path', 'base_path']
).to_dict(orient='index').items()}
sensitivity_dict = utils.read_csv_to_dataframe(
args['sensitivity_table_path'], 'LULC').to_dict(orient='index')

half_saturation_constant = float(args['half_saturation_constant'])

Expand Down Expand Up @@ -1156,11 +1157,12 @@ def validate(args, limit_to=None):

# Get CSVs as dictionaries and ensure the key is a string for threats.
threat_dict = {
str(key): value for key, value in utils.build_lookup_from_csv(
args['threats_table_path'], 'THREAT', to_lower=True,
expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
sensitivity_dict = utils.build_lookup_from_csv(
args['sensitivity_table_path'], 'LULC', to_lower=True)
str(key): value for key, value in utils.read_csv_to_dataframe(
args['threats_table_path'], 'THREAT',
expand_path_cols=['cur_path', 'fut_path', 'base_path']
).to_dict(orient='index').items()}
sensitivity_dict = utils.read_csv_to_dataframe(
args['sensitivity_table_path'], 'LULC').to_dict(orient='index')

# check that the threat names in the threats table match with the
# threats columns in the sensitivity table.
Expand Down
3 changes: 2 additions & 1 deletion src/natcap/invest/hra.py
Original file line number Diff line number Diff line change
Expand Up @@ -1845,7 +1845,8 @@ def _open_table_as_dataframe(table_path, **kwargs):
return excel_df
else:
return utils.read_csv_to_dataframe(
table_path, to_lower=True, expand_path_cols=['path'], **kwargs)
table_path, vals_to_lower=False,
expand_path_cols=['path'], **kwargs)


def _parse_info_table(info_table_path):
Expand Down
4 changes: 2 additions & 2 deletions src/natcap/invest/ndr/ndr.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,8 +619,8 @@ def _validate_inputs(nutrients_to_process, lucode_to_parameters):
if args['calc_' + nutrient_id]:
nutrients_to_process.append(nutrient_id)

lucode_to_parameters = utils.build_lookup_from_csv(
args['biophysical_table_path'], 'lucode')
lucode_to_parameters = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')

_validate_inputs(nutrients_to_process, lucode_to_parameters)

Expand Down
8 changes: 4 additions & 4 deletions src/natcap/invest/pollination.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,8 +1179,8 @@ def _parse_scenario_variables(args):
else:
farm_vector_path = None

guild_table = utils.build_lookup_from_csv(
guild_table_path, 'species', to_lower=True)
guild_table = utils.read_csv_to_dataframe(
guild_table_path, 'species').to_dict(orient='index')

LOGGER.info('Checking to make sure guild table has all expected headers')
guild_headers = list(guild_table.values())[0].keys()
Expand All @@ -1192,8 +1192,8 @@ def _parse_scenario_variables(args):
f"'{header}' but was unable to find one. Here are all the "
f"headers from {guild_table_path}: {', '.join(guild_headers)}")

landcover_biophysical_table = utils.build_lookup_from_csv(
landcover_biophysical_table_path, 'lucode', to_lower=True)
landcover_biophysical_table = utils.read_csv_to_dataframe(
landcover_biophysical_table_path, 'lucode').to_dict(orient='index')
biophysical_table_headers = (
list(landcover_biophysical_table.values())[0].keys())
for header in _EXPECTED_BIOPHYSICAL_HEADERS:
Expand Down
20 changes: 11 additions & 9 deletions src/natcap/invest/recreation/recmodel_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,8 +853,9 @@ def _schedule_predictor_data_processing(
'line_intersect_length': _line_intersect_length,
}

predictor_table = utils.build_lookup_from_csv(
predictor_table_path, 'id', expand_path_cols=['path'])
predictor_table = utils.read_csv_to_dataframe(
predictor_table_path, 'id', expand_path_cols=['path']
).to_dict(orient='index')
predictor_task_list = []
predictor_json_list = [] # tracks predictor files to add to shp

Expand Down Expand Up @@ -1546,7 +1547,8 @@ def _validate_same_id_lengths(table_path):
tables.

"""
predictor_table = utils.build_lookup_from_csv(table_path, 'id')
predictor_table = utils.read_csv_to_dataframe(
table_path, 'id').to_dict(orient='index')
too_long = set()
for p_id in predictor_table:
if len(p_id) > 10:
Expand Down Expand Up @@ -1579,11 +1581,11 @@ def _validate_same_ids_and_types(
tables.

"""
predictor_table = utils.build_lookup_from_csv(
predictor_table_path, 'id')
predictor_table = utils.read_csv_to_dataframe(
predictor_table_path, 'id').to_dict(orient='index')

scenario_predictor_table = utils.build_lookup_from_csv(
scenario_predictor_table_path, 'id')
scenario_predictor_table = utils.read_csv_to_dataframe(
scenario_predictor_table_path, 'id').to_dict(orient='index')

predictor_table_pairs = set([
(p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table])
Expand Down Expand Up @@ -1616,7 +1618,7 @@ def _validate_same_projection(base_vector_path, table_path):
# This will load the table as a list of paths which we can iterate through
# without bothering the rest of the table structure
data_paths = utils.read_csv_to_dataframe(
table_path, to_lower=True, expand_path_cols=['path']
table_path, vals_to_lower=False, expand_path_cols=['path']
).squeeze('columns')['path'].tolist()

base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
Expand Down Expand Up @@ -1673,7 +1675,7 @@ def _validate_predictor_types(table_path):
ValueError if any value in the ``type`` column does not match a valid
type, ignoring leading/trailing whitespace.
"""
df = utils.read_csv_to_dataframe(table_path, to_lower=True)
df = utils.read_csv_to_dataframe(table_path, vals_to_lower=False)
# ignore leading/trailing whitespace because it will be removed
# when the type values are used
type_list = set([type.strip() for type in df['type']])
Expand Down
Loading