Skip to content

Commit

Permalink
Refactor storage and access of results
Browse files Browse the repository at this point in the history
This commit changes storage and access of results in order to improve
performance. DataFrames are replaced by DenseAxisArrays.

It also optimizes the generation of realized results.
  • Loading branch information
daniel-thom committed Aug 7, 2023
1 parent edd4a80 commit 21f6dff
Show file tree
Hide file tree
Showing 28 changed files with 825 additions and 871 deletions.
11 changes: 10 additions & 1 deletion src/PowerSimulations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ include("core/cache_utils.jl")
include("core/optimizer_stats.jl")
include("core/dataset.jl")
include("core/dataset_container.jl")
include("core/results_by_time.jl")

include("core/optimization_container.jl")
include("core/store_common.jl")
Expand Down Expand Up @@ -496,9 +497,9 @@ include("simulation/simulation_store_params.jl")
include("simulation/hdf_simulation_store.jl")
include("simulation/in_memory_simulation_store.jl")
include("simulation/simulation_problem_results.jl")
include("simulation/realized_meta.jl")
include("simulation/decision_model_simulation_results.jl")
include("simulation/emulation_model_simulation_results.jl")
include("simulation/realized_meta.jl")
include("simulation/simulation_partitions.jl")
include("simulation/simulation_partition_results.jl")
include("simulation/simulation_sequence.jl")
Expand Down Expand Up @@ -575,4 +576,12 @@ include("utils/recorder_events.jl")
include("utils/datetime_utils.jl")
include("utils/generate_valid_formulations.jl")

# TODO: These exist for backward compatibility and need to be deprecated and removed.
read_aux_variables_with_keys(args...; kwargs...) =
read_results_with_keys(args...; kwargs...)
read_duals_with_keys(args...; kwargs...) = read_results_with_keys(args...; kwargs...)
read_expressions_with_keys(args...; kwargs...) = read_results_with_keys(args...; kwargs...)
read_parameters_with_keys(args...; kwargs...) = read_results_with_keys(args...; kwargs...)
read_variables_with_keys(args...; kwargs...) = read_results_with_keys(args...; kwargs...)

end
81 changes: 52 additions & 29 deletions src/core/dataset.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
abstract type AbstractDataset end

Base.length(s::AbstractDataset) = size(s.values)[1]
get_data_resolution(s::AbstractDataset)::Dates.Millisecond = s.resolution
get_last_recorded_row(s::AbstractDataset) = s.last_recorded_row

Expand All @@ -21,8 +20,9 @@ end

# Values field is accessed with dot syntax to avoid type instability

mutable struct DataFrameDataset <: AbstractDataset
values::DataFrames.DataFrame
mutable struct InMemoryDataset <: AbstractDataset
"Data with dimensions (column names, row indexes)"
values::DenseAxisArray{Float64, 2}
# We use Array here to allow for overwrites when updating the state
timestamps::Vector{Dates.DateTime}
# Resolution is needed because AbstractDataset might have just one row
Expand All @@ -32,13 +32,13 @@ mutable struct DataFrameDataset <: AbstractDataset
update_timestamp::Dates.DateTime
end

function DataFrameDataset(
values::DataFrames.DataFrame,
function InMemoryDataset(
values::DenseAxisArray{Float64, 2},
timestamps::Vector{Dates.DateTime},
resolution::Dates.Millisecond,
end_of_step_index::Int,
)
return DataFrameDataset(
return InMemoryDataset(
values,
timestamps,
resolution,
Expand All @@ -48,8 +48,8 @@ function DataFrameDataset(
)
end

function DataFrameDataset(values::DataFrames.DataFrame)
return DataFrameDataset(
function InMemoryDataset(values::DenseAxisArray{Float64, 2})
return InMemoryDataset(
values,
Vector{Dates.DateTime}(),
Dates.Second(0.0),
Expand All @@ -59,64 +59,64 @@ function DataFrameDataset(values::DataFrames.DataFrame)
)
end

Base.length(s::InMemoryDataset) = size(s.values)[2]

function make_system_state(
values::DataFrames.DataFrame,
values::DenseAxisArray{Float64, 2},
timestamp::Dates.DateTime,
resolution::Dates.Millisecond,
)
return DataFrameDataset(values, [timestamp], resolution, 0, 1, UNSET_INI_TIME)
return InMemoryDataset(values, [timestamp], resolution, 0, 1, UNSET_INI_TIME)
end

function get_dataset_value(s::DataFrameDataset, date::Dates.DateTime)
function get_dataset_value(s::InMemoryDataset, date::Dates.DateTime)
s_index = find_timestamp_index(s.timestamps, date)
if isnothing(s_index)
error("Request time stamp $date not in the state")
end
return s.values[s_index, :]
return s.values[:, s_index]
end

function get_column_names(::OptimizationContainerKey, s::DataFrameDataset)
return DataFrames.names(s.values)
end
get_column_names(s::InMemoryDataset) = axes(s.values)[1]
get_column_names(::OptimizationContainerKey, s::InMemoryDataset) = get_column_names(s)

function get_last_recorded_value(s::DataFrameDataset)
function get_last_recorded_value(s::InMemoryDataset)
if get_last_recorded_row(s) == 0
error("The Dataset hasn't been written yet")
end
return s.values[get_last_recorded_row(s), :]
return s.values[:, get_last_recorded_row(s)]
end

function get_end_of_step_timestamp(s::DataFrameDataset)
function get_end_of_step_timestamp(s::InMemoryDataset)
return s.timestamps[s.end_of_step_index]
end

"""
Return the timestamp from most recent data row updated in the dataset. This value may not be the same as the result from `get_update_timestamp`
"""
function get_last_updated_timestamp(s::DataFrameDataset)
function get_last_updated_timestamp(s::InMemoryDataset)
last_recorded_row = get_last_recorded_row(s)
if last_recorded_row == 0
return UNSET_INI_TIME
end
return s.timestamps[last_recorded_row]
end

function get_value_timestamp(s::DataFrameDataset, date::Dates.DateTime)
function get_value_timestamp(s::InMemoryDataset, date::Dates.DateTime)
s_index = find_timestamp_index(s.timestamps, date)
if isnothing(s_index)
error("Request time stamp $date not in the state")
end
return s.timestamps[s_index]
end

function set_value!(s::DataFrameDataset, vals, index::Int)
setindex!(s.values, vals, index, :)
function set_value!(s::InMemoryDataset, vals::DenseAxisArray{Float64, 2}, index::Int)
s.values[:, index] = vals[:, index]
return
end

function set_value!(s::DataFrameDataset, vals::DataFrames.DataFrame, index::Int)
@assert_op size(vals)[1] == 1
set_value!(s, vals[1, :], index)
function set_value!(s::InMemoryDataset, vals::DenseAxisArray{Float64, 1}, index::Int)
s.values[:, index] = vals
return
end

Expand All @@ -129,14 +129,35 @@ mutable struct HDF5Dataset <: AbstractDataset
resolution::Dates.Millisecond
initial_timestamp::Dates.DateTime
update_timestamp::Dates.DateTime
column_names::Vector{String}

function HDF5Dataset(values, column_dataset, write_index, last_recorded_row, resolution,
initial_timestamp,
update_timestamp, column_names,
)
new(values, column_dataset, write_index, last_recorded_row, resolution,
initial_timestamp,
update_timestamp, column_names)
end
end

#Base.length(s::HDF5Dataset) = size(s.values)[1] # TODO DT: what about the 3-dim case?
# Not getting called by tests
Base.length(s::HDF5Dataset) = error("die")

HDF5Dataset(values, column_dataset, resolution, initial_time) =
HDF5Dataset(values, column_dataset, 1, 0, resolution, initial_time, UNSET_INI_TIME)
HDF5Dataset(
values,
column_dataset,
1,
0,
resolution,
initial_time,
UNSET_INI_TIME,
column_dataset[:],
)

function get_column_names(::OptimizationContainerKey, s::HDF5Dataset)
return s.column_dataset[:]
end
get_column_names(::OptimizationContainerKey, s::HDF5Dataset) = s.column_names

"""
Return the timestamp from most recent data row updated in the dataset. This value may not be the same as the result from `get_update_timestamp`
Expand All @@ -150,6 +171,8 @@ function get_last_updated_timestamp(s::HDF5Dataset)
end

function get_value_timestamp(s::HDF5Dataset, date::Dates.DateTime)
# TODO: This code is broken because timestamps is not a field.
# The function is called for InMemoryDataset but not HDF5Dataset.
s_index = find_timestamp_index(s.timestamps, date)
if isnothing(s_index)
error("Request time stamp $date not in the state")
Expand Down
11 changes: 5 additions & 6 deletions src/core/dataset_container.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,23 @@ function Base.empty!(container::DatasetContainer)
return
end

function get_duals_values(container::DatasetContainer{DataFrameDataset})
function get_duals_values(container::DatasetContainer{InMemoryDataset})
return container.duals
end

function get_aux_variables_values(container::DatasetContainer{DataFrameDataset})
function get_aux_variables_values(container::DatasetContainer{InMemoryDataset})
return container.aux_variables
end

function get_variables_values(container::DatasetContainer{DataFrameDataset})
function get_variables_values(container::DatasetContainer{InMemoryDataset})
return container.variables
end

function get_parameters_values(container::DatasetContainer{DataFrameDataset})
function get_parameters_values(container::DatasetContainer{InMemoryDataset})
return container.parameters
end

function get_expression_values(container::DatasetContainer{DataFrameDataset})
function get_expression_values(container::DatasetContainer{InMemoryDataset})
return container.expressions
end

Expand Down Expand Up @@ -133,7 +133,6 @@ function get_dataset(
return get_dataset(container, ExpressionKey(T, U))
end

# Get dataset values is currently type unstable since the values field could be a DF
function get_dataset_values(container::DatasetContainer, key::OptimizationContainerKey)
return get_dataset(container, key).values
end
Expand Down
12 changes: 6 additions & 6 deletions src/core/optimization_container.jl
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ function get_constraint(
end

function read_duals(container::OptimizationContainer)
return Dict(k => axis_array_to_dataframe(v, k) for (k, v) in get_duals(container))
return Dict(k => to_dataframe(jump_value.(v), k) for (k, v) in get_duals(container))
end

##################################### Parameter Container ##################################
Expand Down Expand Up @@ -1203,9 +1203,9 @@ function read_parameters(container::OptimizationContainer)
for (k, v) in parameters
# TODO: all functions similar to calculate_parameter_values should be in one
# place and be consistent in behavior.
#params_dict[k] = axis_array_to_dataframe(calculate_parameter_values(v))
param_array = axis_array_to_dataframe(get_parameter_values(v), k)
multiplier_array = axis_array_to_dataframe(get_multiplier_array(v), k)
#params_dict[k] = to_dataframe(calculate_parameter_values(v))
param_array = to_dataframe(get_parameter_values(v), k)
multiplier_array = to_dataframe(get_multiplier_array(v), k)
params_dict[k] = _calculate_parameter_values(k, param_array, multiplier_array)
end
return params_dict
Expand Down Expand Up @@ -1322,7 +1322,7 @@ end

function read_expressions(container::OptimizationContainer)
return Dict(
k => axis_array_to_dataframe(v, k) for (k, v) in get_expressions(container) if
k => to_dataframe(jump_value.(v), k) for (k, v) in get_expressions(container) if
!(get_entry_type(k) <: SystemBalanceExpressions)
)
end
Expand Down Expand Up @@ -1395,7 +1395,7 @@ function write_initial_conditions_data!(
if field == STORE_CONTAINER_PARAMETERS
ic_data_dict[key] = ic_container_dict[key]
else
ic_data_dict[key] = axis_array_to_dataframe(field_container, key)
ic_data_dict[key] = to_dataframe(jump_value.(field_container), key)
end
end
end
Expand Down
84 changes: 84 additions & 0 deletions src/core/results_by_time.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
struct ResultsByTime{T}
key::OptimizationContainerKey
data::SortedDict{Dates.DateTime, T}
resolution::Dates.Period
column_names::Vector{String}
end

function ResultsByTime(key, data, resolution, column_names)
_check_column_consistency(data, column_names)
ResultsByTime(key, data, resolution, column_names)
end

function _check_column_consistency(
data::SortedDict{Dates.DateTime, DenseAxisArray{Float64, 2}},
cols,
)
for val in values(data)
if axes(val)[1] != cols
error("Mismatch in DenseAxisArray column names: $(axes(val)[1]) $cols")
end
end
end

function _check_column_consistency(data::SortedDict{Dates.DateTime, Matrix{Float64}}, cols)
for val in values(data)
if size(val)[2] != length(cols)
error("Mismatch in length of Matrix columns: $(size(val)[2]) $(length(cols))")
end
end
end

# This struct behaves like a dict, delegating to its 'data' field.
Base.length(res::ResultsByTime) = length(res.data)
Base.iterate(res::ResultsByTime) = iterate(res.data)
Base.iterate(res::ResultsByTime, state) = iterate(res.data, state)
Base.getindex(res::ResultsByTime, i) = getindex(res.data, i)
Base.setindex!(res::ResultsByTime, v, i) = setindex!(res.data, v, i)
Base.firstindex(res::ResultsByTime) = firstindex(res.data)
Base.lastindex(res::ResultsByTime) = lastindex(res.data)

get_column_names(x::ResultsByTime) = x.column_names
get_num_rows(::ResultsByTime{DenseAxisArray{Float64, 2}}, data) = length(axes(data)[2])
get_num_rows(::ResultsByTime{Matrix{Float64}}, data) = size(data)[1]

function _add_timestamps!(df::DataFrames.DataFrame, results::ResultsByTime, timestamp, data)
time_col =
range(timestamp; length = get_num_rows(results, data), step = results.resolution)
DataFrames.insertcols!(df, 1, :DateTime => time_col)
end

function make_dataframe(
results::ResultsByTime{DenseAxisArray{Float64, 2}},
timestamp::Dates.DateTime,
)
array = results.data[timestamp]
df = DataFrames.DataFrame(permutedims(array.data), axes(array)[1])
_add_timestamps!(df, results, timestamp, array)
return df
end

function make_dataframe(results::ResultsByTime{Matrix{Float64}}, timestamp::Dates.DateTime)
array = results.data[timestamp]
df = DataFrames.DataFrame(array, results.column_names)
_add_timestamps!(df, results, timestamp, array)
return df
end

function make_dataframes(results::ResultsByTime)
return SortedDict(k => make_dataframe(results, k) for k in keys(results.data))
end

struct ResultsByKeyAndTime
"Contains all keys stored in the model."
result_keys::Vector{OptimizationContainerKey}
"Contains the results that have been read from the store and cached."
cached_results::Dict{OptimizationContainerKey, ResultsByTime}
end

ResultsByKeyAndTime(result_keys) = ResultsByKeyAndTime(
collect(result_keys),
Dict{OptimizationContainerKey, ResultsByTime}(),
)

Base.empty!(res::ResultsByKeyAndTime) = empty!(res.cached_results)
Loading

0 comments on commit 21f6dff

Please sign in to comment.