Skip to content

Commit

Permalink
Remove DataFrames completely (#1035)
Browse files Browse the repository at this point in the history
  • Loading branch information
abelsiqueira authored Feb 26, 2025
1 parent 68c3bcc commit e8e59d5
Show file tree
Hide file tree
Showing 18 changed files with 85 additions and 81 deletions.
6 changes: 2 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ version = "0.12.0"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
DuckDB_jll = "2cbbab25-fc8b-58cf-88d4-687a02676033"
HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
Expand All @@ -21,9 +20,8 @@ TulipaIO = "7b3808b7-0819-42d4-885c-978ba173db11"

[compat]
CSV = "0.10"
DataFrames = "1"
DuckDB = "0.10, ~1.0" # ~1.0 until they fix https://github.com/duckdb/duckdb/issues/13911
DuckDB_jll = "0.10, ~1.0" # DuckDB 1.0.0 still allows DuckDB_jll 1.1.0
DuckDB = "0.10, ~1.0"
DuckDB_jll = "0.10, ~1.0"
HiGHS = "1"
JuMP = "1"
MathOptInterface = "1"
Expand Down
1 change: 0 additions & 1 deletion src/TulipaEnergyModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ module TulipaEnergyModel

## Data
using CSV: CSV
using DataFrames: DataFrames, DataFrame
using DuckDB: DuckDB, DBInterface
using TOML: TOML
using TulipaIO: TulipaIO
Expand Down
8 changes: 4 additions & 4 deletions src/constraints/capacity.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ function add_capacity_constraints!(connection, model, expressions, constraints,
outgoing_flow profile_times_capacity,
base_name = "$cons_name[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, outgoing_flow, profile_times_capacity) in zip(
eachrow(constraints[table_name].indices),
constraints[table_name].indices,
constraints[table_name].expressions[:outgoing],
constraints[table_name].expressions[:profile_times_capacity],
)
Expand All @@ -271,7 +271,7 @@ function add_capacity_constraints!(connection, model, expressions, constraints,
outgoing_flow profile_times_capacity,
base_name = "$cons_name[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, outgoing_flow, profile_times_capacity) in zip(
eachrow(constraints[table_name].indices),
constraints[table_name].indices,
constraints[table_name].expressions[:outgoing],
constraints[table_name].expressions[Symbol("profile_times_capacity$suffix")],
)
Expand All @@ -294,7 +294,7 @@ function add_capacity_constraints!(connection, model, expressions, constraints,
incoming_flow profile_times_capacity,
base_name = "$cons_name[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, incoming_flow, profile_times_capacity) in zip(
eachrow(constraints[table_name].indices),
constraints[table_name].indices,
constraints[table_name].expressions[:incoming],
constraints[table_name].expressions[:profile_times_capacity],
)
Expand All @@ -317,7 +317,7 @@ function add_capacity_constraints!(connection, model, expressions, constraints,
incoming_flow profile_times_capacity,
base_name = "$cons_name[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, incoming_flow, profile_times_capacity) in zip(
eachrow(constraints[table_name].indices),
constraints[table_name].indices,
constraints[table_name].expressions[:incoming],
constraints[table_name].expressions[Symbol("profile_times_capacity$suffix")],
)
Expand Down
9 changes: 2 additions & 7 deletions src/constraints/conversion.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
export add_conversion_constraints!

"""
add_conversion_constraints!(model,
dataframes,
Acv,
incoming_flow_lowest_resolution,
outgoing_flow_lowest_resolution,
)
add_conversion_constraints!(model, constraints)
Adds the conversion asset constraints to the model.
"""
Expand All @@ -23,7 +18,7 @@ function add_conversion_constraints!(model, constraints)
incoming_flow == outgoing_flow,
base_name = "conversion_balance[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, incoming_flow, outgoing_flow) in
zip(eachrow(cons.indices), cons.expressions[:incoming], cons.expressions[:outgoing])
zip(cons.indices, cons.expressions[:incoming], cons.expressions[:outgoing])
],
)
end
Expand Down
10 changes: 5 additions & 5 deletions src/constraints/group.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function add_group_constraints!(connection, model, variables, constraints)
asset_row.capacity * assets_investment[asset_row.index] for
asset_row in _get_assets_in_group(connection, row.name)
)
) for row in eachrow(cons.indices)
) for row in cons.indices
],
)
end
Expand All @@ -35,8 +35,8 @@ function add_group_constraints!(connection, model, variables, constraints)
model,
investment_group row.max_investment_limit,
base_name = "investment_group_max_limit[$(row.name)]"
) for (row, investment_group) in
zip(eachrow(cons.indices), cons.expressions[:investment_group])
) for
(row, investment_group) in zip(cons.indices, cons.expressions[:investment_group])
],
)
end
Expand All @@ -51,8 +51,8 @@ function add_group_constraints!(connection, model, variables, constraints)
model,
investment_group row.min_investment_limit,
base_name = "investment_group_min_limit[$(row.name)]"
) for (row, investment_group) in
zip(eachrow(cons.indices), cons.expressions[:investment_group])
) for
(row, investment_group) in zip(cons.indices, cons.expressions[:investment_group])
],
)
end
Expand Down
2 changes: 1 addition & 1 deletion src/constraints/hub.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function add_hub_constraints!(model, constraints)
incoming_flow == outgoing_flow,
base_name = "$table_name[$(row.asset),$(row.year),$(row.rep_period),$(row.time_block_start):$(row.time_block_end)]"
) for (row, incoming_flow, outgoing_flow) in
zip(eachrow(cons.indices), cons.expressions[:incoming], cons.expressions[:outgoing])
zip(cons.indices, cons.expressions[:incoming], cons.expressions[:outgoing])
],
)
end
Expand Down
4 changes: 2 additions & 2 deletions src/constraints/ramping-and-unit-commitment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ function add_ramping_constraints!(connection, model, variables, expressions, con
indices = indices_dict[table_name]
## Ramping Constraints with unit commitment
# Note: We start ramping constraints from the second timesteps_block
# We filter and group the dataframe per asset and representative period
# We filter and group the indices per asset and representative period
# get the units on column to get easier the index - 1, i.e., the previous one
units_on = cons.expressions[:units_on]

Expand Down Expand Up @@ -191,7 +191,7 @@ function add_ramping_constraints!(connection, model, variables, expressions, con
indices = indices_dict[table_name]
## Ramping Constraints without unit commitment
# Note: We start ramping constraints from the second timesteps_block
# We filter and group the dataframe per asset and representative period that does not have the unit_commitment methods
# We filter and group the indices per asset and representative period that does not have the unit_commitment methods

# - Maximum ramp-up rate limit to the flow (no unit commitment variables)
attach_constraint!(
Expand Down
46 changes: 22 additions & 24 deletions src/constraints/storage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,17 @@ function add_storage_constraints!(connection, model, variables, expressions, con
previous_level::JuMP.VariableRef = if row.time_block_start > 1
var_storage_level[row.index-1]
else
# Find last index of this group
# TODO: Replace by DuckDB call when working on #955
last_index = last(
DataFrames.subset(
cons.indices,
[:asset, :year, :rep_period] =>
(a, y, rp) ->
a .== row.asset .&&
y .== row.year .&&
rp .== row.rep_period;
view = true,
).index,
)
# Find last index of this group (there are probably cheaper ways, in case this becomes expensive)
last_index = only([
row[1] for row in DuckDB.query(
connection,
"SELECT
MAX(index)
FROM cons_$table_name
WHERE asset = '$(row.asset)' AND year = $(row.year) AND rep_period = $(row.rep_period)
",
)
])::Int
var_storage_level[last_index]
end
@constraint(
Expand Down Expand Up @@ -135,8 +133,7 @@ function add_storage_constraints!(connection, model, variables, expressions, con
var_storage_level = variables[:storage_level_over_clustered_year].container
indices = _append_storage_data_to_indices(connection, table_name)

# This assumes an ordering of the time blocks, that is guaranteed inside
# construct_dataframes
# This assumes an ordering of the time blocks, that is guaranteed by the append function above
# The storage_inflows have been moved here
attach_constraint!(
model,
Expand All @@ -159,15 +156,16 @@ function add_storage_constraints!(connection, model, variables, expressions, con
previous_level::JuMP.VariableRef = if row.period_block_start > 1
var_storage_level[row.index-1]
else
# TODO: Replace by DuckDB call when working on #955
last_index = last(
DataFrames.subset(
cons.indices,
[:asset, :year] =>
(a, y) -> a .== row.asset .&& y .== row.year;
view = true,
).index,
)
last_index = only([
row[1] for row in DuckDB.query(
connection,
"SELECT
MAX(index)
FROM cons_$table_name
WHERE asset = '$(row.asset)' AND year = $(row.year)
",
)
])::Int
var_storage_level[last_index]
end

Expand Down
1 change: 0 additions & 1 deletion src/create-model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ function create_model(
@timeit to "add_unit_commitment_variables!" add_unit_commitment_variables!(model, variables)
@timeit to "add_storage_variables!" add_storage_variables!(connection, model, variables)

## Add expressions to dataframes
@timeit to "add_expressions_to_constraints!" add_expressions_to_constraints!(
connection,
variables,
Expand Down
2 changes: 1 addition & 1 deletion src/expressions/intersection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function attach_expression_on_constraints_grouping_variables!(
[:index, :time_block_start, :time_block_end],
)

num_rows = size(cons.indices, 1)
num_rows = get_num_rows(connection, cons)
attach_expression!(cons, expr_name, Vector{JuMP.AffExpr}(undef, num_rows))
cons.expressions[expr_name] .= JuMP.AffExpr(0.0)

Expand Down
8 changes: 4 additions & 4 deletions src/input-schemas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ const schemas = (
profiles_reference = OrderedDict(
:asset => "VARCHAR", # Asset name
:commission_year => "INTEGER",
:profile_type => "VARCHAR", # Type of profile, used to determine dataframe with source profile
:profile_name => "VARCHAR", # Name of profile, used to determine data inside the dataframe
:profile_type => "VARCHAR", # Type of profile, used to determine source profile
:profile_name => "VARCHAR", # Name of profile, used to match with the profiles_data
),

# Schema for the assets-timeframe-partitions.csv file.
Expand Down Expand Up @@ -149,8 +149,8 @@ const schemas = (
:from_asset => "VARCHAR", # Name of Asset
:to_asset => "VARCHAR", # Name of Asset
:year => "INTEGER",
:profile_type => "VARCHAR", # Type of profile, used to determine dataframe with source profile
:profile_name => "VARCHAR", # Name of profile, used to determine data inside the dataframe
:profile_type => "VARCHAR", # Type of profile, used to determine source profile
:profile_name => "VARCHAR", # Name of profile, used to match with the profiles_data
),

# Schema for the flows-rep-periods-partitions.csv file.
Expand Down
4 changes: 2 additions & 2 deletions src/model-preparation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function add_expression_terms_rep_period_constraints!(
selected_assets = ["hub", "consumer", "producer"],
),
]
num_rows = size(cons.indices, 1)
num_rows = get_num_rows(connection, cons)

# The SQL strategy to improve looping over the groups and then the
# constraints and variables, is to create grouped tables beforehand and join them
Expand Down Expand Up @@ -233,7 +233,7 @@ function add_expression_terms_over_clustered_year_constraints!(
workspace;
is_storage_level = false,
)
num_rows = size(cons.indices, 1)
num_rows = get_num_rows(connection, cons)

cases = [(expr_key = :outgoing, asset_match = :from)]
if is_storage_level
Expand Down
51 changes: 34 additions & 17 deletions src/structures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,38 @@ const PeriodsBlock = UnitRange{Int}
const PeriodType = Symbol
const PERIOD_TYPES = [:rep_periods, :timeframe]

"""
TulipaTabularIndex
Abstract structure for TulipaVariable, TulipaConstraint and TulipaExpression.
All deriving types must satisfy:
- Have fields
- `indices::DuckDB.QueryResult`
- `table_name`::String
"""
abstract type TulipaTabularIndex end

function get_num_rows(connection, table_name::Union{String,Symbol})
return only([row[1] for row in DuckDB.query(connection, "SELECT COUNT(*) FROM $table_name")])
end

function get_num_rows(connection, object::TulipaTabularIndex)
table_name = object.table_name
return get_num_rows(connection, table_name)
end

"""
Structure to hold the JuMP variables for the TulipaEnergyModel
"""
mutable struct TulipaVariable
indices::DataFrame
mutable struct TulipaVariable <: TulipaTabularIndex
indices::DuckDB.QueryResult
table_name::String
container::Vector{JuMP.VariableRef}

function TulipaVariable(connection, table_name::String)
return new(
DuckDB.query(connection, "SELECT * FROM $table_name") |> DataFrame,
DuckDB.query(connection, "SELECT * FROM $table_name"),
table_name,
JuMP.VariableRef[],
)
Expand All @@ -35,8 +56,8 @@ end
"""
Structure to hold the JuMP constraints for the TulipaEnergyModel
"""
mutable struct TulipaConstraint
indices::DataFrame
mutable struct TulipaConstraint <: TulipaTabularIndex
indices::DuckDB.QueryResult
table_name::String
num_rows::Int
constraint_names::Vector{Symbol}
Expand All @@ -46,7 +67,7 @@ mutable struct TulipaConstraint

function TulipaConstraint(connection, table_name::String)
return new(
DuckDB.query(connection, "SELECT * FROM $table_name") |> DataFrame,
DuckDB.query(connection, "SELECT * FROM $table_name"),
table_name,
only([ # only makes sure that a single value is returned
row.num_rows for
Expand All @@ -63,15 +84,15 @@ end
"""
Structure to hold some JuMP expressions that are not attached to constraints but are attached to a table.
"""
mutable struct TulipaExpression
indices::DataFrame
mutable struct TulipaExpression <: TulipaTabularIndex
indices::DuckDB.QueryResult
table_name::String
num_rows::Int
expressions::Dict{Symbol,Vector{JuMP.AffExpr}}

function TulipaExpression(connection, table_name::String)
return new(
DuckDB.query(connection, "SELECT * FROM $table_name") |> DataFrame,
DuckDB.query(connection, "SELECT * FROM $table_name"),
table_name,
only([
row.num_rows for
Expand Down Expand Up @@ -107,7 +128,7 @@ end

function attach_constraint!(model::JuMP.Model, cons::TulipaConstraint, name::Symbol, container)
# This should be the empty case container = Any[] that happens when the
# indices table in empty in [@constraint(...) for row in eachrow(indices)].
# indices table in empty in [@constraint(...) for row in indices].
# It resolves to [] so the element type cannot be inferred
if length(container) > 0
error(
Expand All @@ -130,7 +151,7 @@ Attach a expression named `name` stored in `container`, and optionally set `mode
This checks that the `container` length matches the stored `indices` number of rows.
"""
function attach_expression!(
cons_or_expr::Union{TulipaVariable,TulipaConstraint,TulipaExpression},
cons_or_expr::TulipaTabularIndex,
name::Symbol,
container::Vector{JuMP.AffExpr},
)
Expand All @@ -141,13 +162,9 @@ function attach_expression!(
return nothing
end

function attach_expression!(
cons_or_expr::Union{TulipaVariable,TulipaConstraint,TulipaExpression},
name::Symbol,
container,
)
function attach_expression!(cons_or_expr::TulipaTabularIndex, name::Symbol, container)
# This should be the empty case container = Any[] that happens when the
# indices table in empty in [@constraint(...) for row in eachrow(indices)].
# indices table in empty in [@constraint(...) for row in indices].
# It resolves to [] so the element type cannot be inferred
if length(container) > 0
error(
Expand Down
2 changes: 1 addition & 1 deletion src/variables/flows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export add_flow_variables!
add_flow_variables!(connection, model, variables)
Adds flow variables to the optimization `model` based on data from the `variables`.
The flow variables are created using the `@variable` macro for each row in the `:flows` dataframe.
The flow variables are created using the `@variable` macro for each row in the `:flows` table.
"""
function add_flow_variables!(connection, model, variables)
Expand Down
Loading

0 comments on commit e8e59d5

Please sign in to comment.