from __future__ import annotations
from smash.solver._mwd_setup import SetupDT
from smash.solver._mwd_mesh import MeshDT
from smash.solver._mwd_input_data import Input_DataDT
from smash.solver._mwd_parameters import ParametersDT
from smash.solver._mwd_states import StatesDT
from smash.solver._mwd_output import OutputDT
from smash.solver._mw_forward import forward
from smash.core._constant import OPTIM_FUNC
from smash.core._build_model import (
_parse_derived_type,
_build_setup,
_build_mesh,
_build_input_data,
_build_parameters,
)
from smash.core.simulation.multiple_run import _multiple_run
from smash.core.simulation.bayes_optimize import _bayes_computation
from smash.core.simulation._ann_optimize import _ann_optimize
from smash.core.simulation._standardize import (
_standardize_multiple_run_args,
_standardize_optimize_args,
_standardize_optimize_options,
_standardize_bayes_estimate_args,
_standardize_bayes_optimize_args,
_standardize_ann_optimize_args,
)
from smash.core._event_segmentation import (
_event_segmentation,
_standardize_event_seg_options,
)
from smash.core.signatures import (
_standardize_signatures,
_signatures,
_signatures_sensitivity,
)
from smash.core.prcp_indices import _prcp_indices
from smash.core.generate_samples import (
_get_bound_constraints,
_standardize_problem,
SampleResult,
)
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import pandas as pd
from smash.core.net import Net
import numpy as np
__all__ = ["Model"]
[docs]class Model(object):
"""
Primary data structure of the hydrological model `smash`.
Parameters
----------
setup : dict
Model initialization setup dictionary (see: :ref:`setup arguments <user_guide.others.model_initialization.setup>`).
mesh : dict
Model initialization mesh dictionary. (see: :ref:`mesh arguments <user_guide.others.model_initialization.mesh>`).
See Also
--------
save_setup: Save Model initialization setup dictionary.
read_setup: Read Model initialization setup dictionary.
save_mesh: Save Model initialization mesh dictionary.
read_mesh: Read Model initialization mesh dictionary.
generate_mesh: Automatic mesh generation.
save_model: Save Model object.
read_model: Read Model object.
save_model_ddt: Save some derived data types of the Model object to HDF5 file.
read_model_ddt: Read derived data types of the Model object from HDF5 file.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> model
Structure: 'gr-a'
Spatio-Temporal dimension: (x: 28, y: 28, time: 1440)
Last update: Initialization
"""
def __init__(self, setup: dict, mesh: dict):
if setup or mesh:
if isinstance(setup, dict):
descriptor_name = setup.get("descriptor_name", [])
nd = 1 if isinstance(descriptor_name, str) else len(descriptor_name)
self.setup = SetupDT(nd, mesh["ng"])
_parse_derived_type(self.setup, setup)
else:
raise TypeError(
f"'setup' argument must be dictionary, not {type(setup)}"
)
_build_setup(self.setup)
if isinstance(mesh, dict):
self.mesh = MeshDT(self.setup, mesh["nrow"], mesh["ncol"], mesh["ng"])
_parse_derived_type(self.mesh, mesh)
else:
raise TypeError(f"'mesh' argument must be dictionary, not {type(mesh)}")
_build_mesh(self.setup, self.mesh)
self.input_data = Input_DataDT(self.setup, self.mesh)
_build_input_data(self.setup, self.mesh, self.input_data)
self.parameters = ParametersDT(self.mesh)
_build_parameters(self.setup, self.mesh, self.input_data, self.parameters)
self.states = StatesDT(self.mesh)
self.output = OutputDT(self.setup, self.mesh)
self._last_update = "Initialization"
def __repr__(self):
structure = f"Structure: '{self.setup.structure}'"
dim = f"Spatio-Temporal dimension: (x: {self.mesh.ncol}, y: {self.mesh.nrow}, time: {self.setup._ntime_step})"
last_update = f"Last update: {self._last_update}"
return f"{structure}\n{dim}\n{last_update}"
@property
def setup(self):
"""
The setup of the Model.
The model setup is represented as a SetupDT object. See `SetupDT <smash.solver._mwd_setup.SetupDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.setup.<TAB>
model.setup.copy() model.setup.prcp_directory
model.setup.daily_interannual_pet model.setup.prcp_format
model.setup.descriptor_directory model.setup.qobs_directory
model.setup.descriptor_format model.setup.read_descriptor
model.setup.descriptor_name model.setup.read_pet
model.setup.dt model.setup.read_prcp
model.setup.end_time model.setup.read_qobs
model.setup.from_handle( model.setup.save_net_prcp_domain
model.setup.mean_forcing model.setup.save_qsim_domain
model.setup.pet_conversion_factor model.setup.sparse_storage
model.setup.pet_directory model.setup.start_time
model.setup.pet_format model.setup.structure
model.setup.prcp_conversion_factor
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._setup
@setup.setter
def setup(self, value):
if isinstance(value, SetupDT):
self._setup = value
else:
raise TypeError(
f"'setup' attribute must be set with {type(SetupDT())}, not {type(value)}"
)
@property
def mesh(self):
"""
The mesh of the Model.
The model mesh is represented as a MeshDT object. See `MeshDT <smash.solver._mwd_mesh.MeshDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.mesh.<TAB>
model.mesh.active_cell model.mesh.gauge_pos
model.mesh.area model.mesh.nac
model.mesh.code model.mesh.ncol
model.mesh.copy() model.mesh.ng
model.mesh.dx model.mesh.nrow
model.mesh.flwacc model.mesh.path
model.mesh.flwdir model.mesh.xmin
model.mesh.flwdst model.mesh.ymax
model.mesh.from_handle(
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._mesh
@mesh.setter
def mesh(self, value):
if isinstance(value, MeshDT):
self._mesh = value
else:
raise TypeError(
f"'mesh' attribute must be set with {type(MeshDT())}, not {type(value)}"
)
@property
def input_data(self):
"""
The input data of the Model.
The model input data is represented as a Input_DataDT object. See `Input_DataDT <smash.solver._mwd_input_data.Input_DataDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.input_data.<TAB>
model.input_data.copy() model.input_data.pet
model.input_data.descriptor model.input_data.prcp
model.input_data.from_handle( model.input_data.qobs
model.input_data.mean_pet model.input_data.sparse_pet
model.input_data.mean_prcp model.input_data.sparse_prcp
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._input_data
@input_data.setter
def input_data(self, value):
if isinstance(value, Input_DataDT):
self._input_data = value
else:
raise TypeError(
f"'input_data' attribute must be set with {type(Input_DataDT())}, not {type(value)}"
)
@property
def parameters(self):
"""
The parameters of the Model.
The model parameters is represented as a ParametersDT object. See `ParametersDT <smash.solver._mwd_parameters.ParametersDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.parameters.<TAB>
model.parameters.alpha model.parameters.cusl1
model.parameters.b model.parameters.cusl2
model.parameters.beta model.parameters.ds
model.parameters.cft model.parameters.dsm
model.parameters.ci model.parameters.exc
model.parameters.clsl model.parameters.from_handle(
model.parameters.copy() model.parameters.ks
model.parameters.cp model.parameters.lr
model.parameters.cst model.parameters.ws
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._parameters
@parameters.setter
def parameters(self, value):
if isinstance(value, ParametersDT):
self._parameters = value
else:
raise TypeError(
f"'parameters' attribute must be set with {type(ParametersDT())}, not {type(value)}"
)
@property
def states(self):
"""
The states of the Model.
The model states is represented as a StatesDT object. See `StatesDT <smash.solver._mwd_states.StatesDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.states.<TAB>
model.states.copy() model.states.hlsl
model.states.from_handle( model.states.hp
model.states.hft model.states.hst
model.states.hi model.states.husl1
model.states.hlr model.states.husl2
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._states
@states.setter
def states(self, value):
if isinstance(value, StatesDT):
self._states = value
else:
raise TypeError(
f"'states' attribute must be set with {type(StatesDT())}, not {type(value)}"
)
@property
def output(self):
"""
The output of the Model.
The model output is represented as a OutputDT object. See `OutputDT <smash.solver._mwd_output.OutputDT>`.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
If you are using IPython, tab completion allows you to visualize all the attributes and methods:
>>> model.output.<TAB>
model.output.copy() model.output.qsim
model.output.cost model.output.qsim_domain
model.output.from_handle( model.output.sparse_net_prcp_domain
model.output.fstates model.output.sparse_qsim_domain
model.output.net_prcp_domain
Notes
-----
This object is a wrapped derived type from `f90wrap <https://github.com/jameskermode/f90wrap>`__.
"""
return self._output
@output.setter
def output(self, value):
if isinstance(value, OutputDT):
self._output = value
else:
raise TypeError(
f"'output' attribute must be set with {type(OutputDT())}, not {type(value)}"
)
@property
def _last_update(self):
return self.__last_update
@_last_update.setter
def _last_update(self, value):
if isinstance(value, str):
self.__last_update = value
else:
raise TypeError(
f"'_last_update' attribute must be set with {str}, not {type(value)}"
)
[docs] def copy(self):
"""
Make a deepcopy of the Model.
Returns
-------
Model
A copy of Model.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
Create a pointer towards Model
>>> model_ptr = model
>>> model_ptr.parameters.cp = 1
>>> model_ptr.parameters.cp[0,0], model.parameters.cp[0,0]
(1.0, 1.0)
Create a deepcopy of Model
>>> model_dc = model.copy()
>>> model_dc.parameters.cp = 200
>>> model_dc.parameters.cp[0,0], model.parameters.cp[0,0]
(200.0, 1.0)
"""
copy = Model(None, None)
copy.setup = self.setup.copy()
copy.mesh = self.mesh.copy()
copy.input_data = self.input_data.copy()
copy.parameters = self.parameters.copy()
copy.states = self.states.copy()
copy.output = self.output.copy()
copy._last_update = self._last_update
return copy
[docs] def run(self, inplace: bool = False):
"""
Run the Model.
Parameters
----------
inplace : bool, default False
If True, perform operation in-place.
Returns
-------
Model : Model or None
Model with run outputs if not **inplace**.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> model.run(inplace=True)
>>> model
Structure: 'gr-a'
Spatio-Temporal dimension: (x: 28, y: 28, time: 1440)
Last update: Forward Run
Access to simulated discharge
>>> model.output.qsim[0,:]
array([1.9826449e-03, 1.3466686e-07, 6.7618025e-12, ..., 2.0916510e+01,
2.0762346e+01, 2.0610489e+01], dtype=float32)
"""
if inplace:
instance = self
else:
instance = self.copy()
print("</> Run Model")
cost = np.float32(0)
forward(
instance.setup,
instance.mesh,
instance.input_data,
instance.parameters,
instance.parameters.copy(),
instance.states,
instance.states.copy(),
instance.output,
cost,
)
instance._last_update = "Forward Run"
if not inplace:
return instance
[docs] def multiple_run(
self,
sample: SampleResult,
jobs_fun: str | list | tuple = "nse",
wjobs_fun: list | tuple | None = None,
event_seg: dict | None = None,
gauge: str | list | tuple = "downstream",
wgauge: str | list | tuple = "mean",
ost: str | pd.Timestamp | None = None,
ncpu: int = 1,
return_qsim: bool = False,
verbose: bool = True,
):
"""
Compute Multiple Run of Model.
.. hint::
See the :ref:`user_guide` for more.
Parameters
----------
sample : SampleResult
An instance of the `SampleResult` object, which should be created using the `smash.generate_samples` method.
jobs_fun, wjobs_fun, event_seg, gauge, wgauge, ost : multiple types
Optimization setting to run the forward hydrological model and compute the cost values.
See `smash.Model.optimize` for more.
ncpu : integer, default 1
If **ncpu** > 1, perform a parallel computation through the parameters set.
return_qsim : bool, default False
If True, also return the simulated discharge in the `MultipleRunResult` object.
verbose : bool, default True
Display information while computing.
Returns
-------
res : MultipleRunResult
The multiple run results represented as a `MultipleRunResult` object.
See Also
--------
MultipleRunResult: Represents the multiple run result.
SampleResult: Represents the generated sample result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
Get the boundary constraints of the Model parameters/states and generate a sample
>>> problem = model.get_bound_constraints()
>>> sample = smash.generate_samples(problem, n=200, random_state=99)
Compute the multiple run
>>> mtprr = model.multiple_run(sample, ncpu=4, return_qsim=True)
Access the cost values of the direct simulations
>>> mtprr.cost
array([1.2327451e+00, 1.2367475e+00, 1.2227478e+00, 4.7788401e+00,
...
1.2392160e+00, 1.2278881e+00, 7.5998020e-01, 1.1763511e+00],
dtype=float32)
Access the minimum cost value and the index
>>> min_cost = np.min(mtprr.cost)
>>> ind_min_cost = np.argmin(mtprr.cost)
>>> min_cost, ind_min_cost
(0.48862067, 20)
Access the set that generated the minimum cost value
>>> min_set = {key: sample[key][ind_min_cost] for key in problem["names"]}
>>> min_set
{'cp': 211.6867863776767, 'cft': 41.72451338560559, 'exc': -9.003870580641795, 'lr': 43.64397561764691}
"""
instance = self.copy()
print("</> Multiple Run Model")
# % standardize args
(
sample,
jobs_fun,
wjobs_fun,
event_seg,
wgauge,
ost,
ncpu,
) = _standardize_multiple_run_args(
sample,
jobs_fun,
wjobs_fun,
event_seg,
gauge,
wgauge,
ost,
ncpu,
instance,
)
return _multiple_run(
instance,
sample,
jobs_fun,
wjobs_fun,
event_seg,
wgauge,
ost,
ncpu,
return_qsim,
verbose,
)
[docs] def optimize(
self,
mapping: str = "uniform",
algorithm: str | None = None,
control_vector: str | list | tuple | None = None,
bounds: dict | None = None,
jobs_fun: str | list | tuple = "nse",
wjobs_fun: list | tuple | None = None,
event_seg: dict | None = None,
gauge: str | list | tuple = "downstream",
wgauge: str | list | tuple = "mean",
ost: str | pd.Timestamp | None = None,
options: dict | None = None,
verbose: bool = True,
inplace: bool = False,
):
"""
Optimize the Model.
.. hint::
See the :ref:`user_guide` and :ref:`math_num_documentation` for more.
Parameters
----------
mapping : str, default 'uniform'
Type of mapping. Should be one of
- 'uniform'
- 'distributed'
- 'hyper-linear'
- 'hyper-polynomial'
algorithm : str or None, default None
Type of algorithm. Should be one of
- 'sbs'
- 'nelder-mead'
- 'l-bfgs-b'
.. note::
If not given, chosen to be one of 'sbs' or 'l-bfgs-b' depending on the optimization mapping.
control_vector : str, sequence or None, default None
Parameters and/or states to be optimized. The control vector argument
can be any parameter or state name or any sequence of parameter and/or state names.
.. note::
If not given, the control vector will be composed of the parameters of the structure defined in the Model setup.
bounds : dict or None, default None
Bounds on control vector. The bounds argument is a dictionary where keys are the name of the
parameters and/or states in the control vector (can be a subset of control vector sequence)
and the values are pairs of ``(min, max)`` values (i.e. list or tuple) with ``min`` lower than ``max``.
None value inside the dictionary will be filled in with default bound values.
.. note::
If not given, the bounds will be filled in with default bound values.
jobs_fun : str or sequence, default 'nse'
Type of objective function(s) to be minimized. Should be one or a sequence of any
- 'nse', 'kge', 'kge2', 'se', 'rmse', 'logarithmic' (classical objective function)
- 'Crc', 'Cfp2', 'Cfp10', 'Cfp50', 'Cfp90' (continuous signature)
- 'Epf', 'Elt', 'Erc' (flood event signature)
.. hint::
See a detailed explanation on the objective function in :ref:`Math / Num Documentation <math_num_documentation.signal_analysis.cost_functions>` section.
wjobs_fun : sequence or None, default None
Objective function(s) weights in case of multi-criteria optimization (i.e. a sequence of objective functions to minimize).
.. note::
If not given, the cost value will be computed as the mean of the objective functions.
event_seg : dict or None, default None
A dictionary of event segmentation options when calculating flood event signatures for cost computation. The keys are
- 'peak_quant'
- 'max_duration'
See `smash.Model.event_segmentation` for more.
.. note::
If not given in case flood signatures are computed, the default values will be set for these parameters.
gauge : str, sequence, default 'downstream'
Type of gauge to be optimized. There are two ways to specify it:
1. A gauge code or any sequence of gauge codes.
The gauge code(s) given must belong to the gauge codes defined in the Model mesh.
2. An alias among 'all' and 'downstream'. 'all' is equivalent to a sequence of all gauge codes.
'downstream' is equivalent to the gauge code of the most downstream gauge.
wgauge : str, sequence, default 'mean'
Type of gauge weights. There are two ways to specify it:
1. A sequence of value whose size must be equal to the number of gauges optimized.
2. An alias among 'mean', 'median', 'area' or 'minv_area'.
ost : str, pandas.Timestamp or None, default None
The optimization start time. The optimization will only be performed between the
optimization start time and the end time. The value can be a str which can be interpreted by
pandas.Timestamp `(see here) <https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html>`__.
The ost date value must be between the start time and the end time defined in the Model setup.
.. note::
If not given, the optimization start time will be equal to the start time.
options : dict or None, default None
A dictionary of algorithm options. Depending on the algorithm, different options can be pass.
.. hint::
See each algorithm options:
- 'sbs' :ref:`(see here) <api_reference.optimize_sbs>`
- 'nelder-mead' :ref:`(see here) <api_reference.optimize_nelder-mead>`
- 'l-bfgs-b' :ref:`(see here) <api_reference.optimize_l-bfgs-b>`
verbose : bool, default True
Display information while optimizing.
inplace : bool, default False
If True, perform operation in-place.
Returns
-------
Model : Model or None
Model with optimize outputs if not **inplace**.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> model.optimize(inplace=True)
>>> model
Structure: 'gr-a'
Spatio-Temporal dimension: (x: 28, y: 28, time: 1440)
Last update: SBS Optimization
Access to simulated discharge
>>> model.output.qsim[0,:]
array([5.7140866e-04, 4.7018618e-04, 3.5345653e-04, ..., 1.9017689e+01,
1.8781073e+01, 1.8549627e+01], dtype=float32)
Access to optimized parameters
>>> ind = tuple(model.mesh.gauge_pos[0,:])
>>> ind
(20, 27)
>>> (
... "cp", model.parameters.cp[ind],
... "cft", model.parameters.cft[ind],
... "exc", model.parameters.exc[ind],
... "lr", model.parameters.lr[ind],
... )
('cp', 76.57858, 'cft', 263.64627, 'exc', -1.455813, 'lr', 30.859276)
"""
if inplace:
instance = self
else:
instance = self.copy()
print("</> Optimize Model")
# % standardize args
(
mapping,
algorithm,
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
) = _standardize_optimize_args(
mapping,
algorithm,
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
gauge,
wgauge,
ost,
instance,
)
options = _standardize_optimize_options(options, instance)
res = OPTIM_FUNC[algorithm](
instance,
control_vector,
mapping,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
verbose,
**options,
)
instance._last_update = f"{algorithm.upper()} Optimization"
if res is not None:
if not inplace:
return instance, res
else:
return res
else:
if not inplace:
return instance
[docs] def bayes_estimate(
self,
sample: SampleResult | None = None,
alpha: int | float | range | list | tuple | np.ndarray = 4,
n: int = 1000,
random_state: int | None = None,
jobs_fun: str | list | tuple = "nse",
wjobs_fun: list | tuple | None = None,
event_seg: dict | None = None,
gauge: str | list | tuple = "downstream",
wgauge: str | list | tuple = "mean",
ost: str | pd.Timestamp | None = None,
ncpu: int = 1,
verbose: bool = True,
inplace: bool = False,
return_br: bool = False,
):
"""
Estimate prior Model parameters/states using Bayesian approach.
.. hint::
See the :ref:`user_guide` and :ref:`math_num_documentation` for more.
Parameters
----------
sample : SampleResult or None, default None
An instance of the `SampleResult` object, which should be created using the `smash.generate_samples` method.
.. note::
If not given, the Model parameters samples will be generated automatically using the uniform generator
based on the Model structure considered.
alpha : int, float or sequence, default 4
A regularization parameter that controls the decay rate of the likelihood function.
.. note::
If **alpha** is a sequence, then the L-curve approach will be used to find an optimal value for the regularization parameter.
n : int, default 1000
Number of generated samples. Only used if **sample** is not set.
random_state : int or None, default None
Random seed used to generate samples. Only used if **sample** is not set.
.. note::
If not given and **sample** is not set, generates the parameters set with a random seed.
jobs_fun, wjobs_fun, event_seg, gauge, wgauge, ost : multiple types
Optimization setting to run the forward hydrological model and compute the cost values.
See `smash.Model.optimize` for more.
ncpu : integer, default 1
If **ncpu** > 1, perform a parallel computation through the parameters set.
verbose : bool, default True
Display information while estimating.
inplace : bool, default False
If True, perform operation in-place.
return_br : bool, default False
If True, also return the Bayesian estimation result `BayesResult`.
Returns
-------
Model : Model or None
Model with optimize outputs if not **inplace**.
res : BayesResult
The Bayesian estimation results represented as a `BayesResult` object if **return_br**.
See Also
--------
BayesResult: Represents the Bayesian estimation or optimization result.
SampleResult: Represents the generated sample result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> br = model.bayes_estimate(n=200, inplace=True, return_br=True, random_state=99)
Access to cost values of the direct simulations
>>> cost = br.data["cost"]
>>> cost.sort() # sort the values by ascending order
>>> cost
array([4.88620669e-01, 5.70850313e-01, 7.37333179e-01, 7.59980202e-01,
...
9.26341797e+03, 1.12409111e+04, 1.13607480e+04, 1.18410371e+04])
Compare to the cost value of the Model with the estimated parameters using Bayesian apporach
>>> model.output.cost
0.41908782720565796
"""
if inplace:
instance = self
else:
instance = self.copy()
print("</> Bayes Estimate Model")
# % standardize args
(
sample,
alpha,
jobs_fun,
wjobs_fun,
event_seg,
wgauge,
ost,
ncpu,
) = _standardize_bayes_estimate_args(
sample,
alpha,
n,
random_state,
jobs_fun,
wjobs_fun,
event_seg,
gauge,
wgauge,
ost,
ncpu,
instance,
)
res = _bayes_computation(
instance,
sample,
alpha,
None,
None,
None,
None,
jobs_fun,
wjobs_fun,
event_seg,
None,
wgauge,
ost,
verbose,
None,
ncpu,
)
instance._last_update = "Bayesian Estimation"
if return_br:
if not inplace:
return instance, res
else:
return res
else:
if not inplace:
return instance
[docs] def bayes_optimize(
self,
sample: SampleResult | None = None,
alpha: int | float | range | list | tuple | np.ndarray = 4,
n: int = 1000,
random_state: int | None = None,
de_bw_method: str | None = None,
de_weights: np.ndarray | None = None,
mapping: str = "uniform",
algorithm: str | None = None,
control_vector: str | list | tuple | None = None,
bounds: list | tuple | None = None,
jobs_fun: str | list | tuple = "nse",
wjobs_fun: list | tuple | None = None,
event_seg: dict | None = None,
gauge: str | list | tuple = "downstream",
wgauge: str | list | tuple = "mean",
ost: str | pd.Timestamp | None = None,
options: dict | None = None,
ncpu: int = 1,
verbose: bool = True,
inplace: bool = False,
return_br: bool = False,
):
"""
Optimize the Model using Bayesian approach.
.. hint::
See the :ref:`user_guide` and :ref:`math_num_documentation` for more.
Parameters
----------
sample : SampleResult or None, default None
An instance of the `SampleResult` object, which should be created using the `smash.generate_samples` method.
.. note::
If not given, the Model parameters samples will be generated automatically using the uniform generator
based on both **control_vector** and **bounds** arguments.
alpha : int, float or sequence, default 4
A regularization parameter that controls the decay rate of the likelihood function.
.. note::
If **alpha** is a sequence, then the L-curve approach will be used to find an optimal value for the regularization parameter.
n : int, default 1000
Number of generated samples. Only used if **sample** is not set.
random_state : int or None, default None
Random seed used to generate samples. Only used if **sample** is not set.
.. note::
If not given and **sample** is not set, generates the parameters set with a random seed.
de_bw_method : str, scalar, callable or None, default None
The method used to calculate the estimator bandwidth to estimate the density distribution.
This can be 'scott', 'silverman', a scalar constant or a callable.
.. note::
If not given, 'scott' is used as default.
See `here <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html>`__ for more details.
de_weights : array-like or None, default None
A parameter related to weights of datapoints to estimate the density distribution.
.. note::
If not given, the samples are assumed to be equally weighted.
See `here <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html>`__ for more details.
mapping, algorithm, jobs_fun, wjobs_fun, event_seg, gauge, wgauge, ost, options : multiple types
Optimization setting to optimize the Model using each generated spatially uniform parameters/states set as a first guess.
See `smash.Model.optimize` for more.
control_vector : str, sequence or None, default None
Parameters and/or states to be optimized. The control vector argument
can be any parameter or state name or any sequence of parameter and/or state names.
.. note::
If not given, the control vector will be composed of the parameters of the structure defined in the Model setup.
bounds : dict or None, default None
Bounds on control vector. The bounds argument is a dictionary where keys are the name of the
parameters and/or states in the control vector (can be a subset of control vector sequence)
and the values are pairs of ``(min, max)`` values (i.e. list or tuple) with ``min`` lower than ``max``.
None value inside the dictionary will be filled in with default bound values.
.. note::
If not given, the bounds will be filled in with default bound values.
ncpu : integer, default 1
If **ncpu** > 1, perform a parallel computation through the parameters set.
verbose : bool, default True
Display information while optimizing.
inplace : bool, default False
If True, perform operation in-place.
return_br : bool, default False
If True, also return the Bayesian optimization result `BayesResult`.
Returns
-------
Model : Model or None
Model with optimize outputs if not **inplace**.
res : BayesResult
The Bayesian optimization results represented as a `BayesResult` object if **return_br**.
See Also
--------
BayesResult: Represents the Bayesian estimation or optimization result.
SampleResult: Represents the generated sample result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> br = model.bayes_optimize(alpha=1.75, n=100, inplace=True, options={"maxiter": 2}, return_br=True, random_state=99)
Access to cost values of the optimizations with different set of Model parameters
>>> cost = br.data["cost"]
>>> cost.sort() # sort the values by ascending order
>>> cost
array([0.04417887, 0.04713613, 0.05019313, 0.0512022 , 0.0563822 ,
...
1.14625084, 1.15444124, 1.17005932, 1.19171917, 1.19486201])
Compare to the cost value of the Model with the optimized parameters using Bayesian apporach
>>> model.output.cost
0.04369253292679787
"""
if inplace:
instance = self
else:
instance = self.copy()
print("</> Bayes Optimize Model")
# % standardize args
(
sample,
alpha,
mapping,
algorithm,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
ncpu,
) = _standardize_bayes_optimize_args(
sample,
alpha,
n,
random_state,
mapping,
algorithm,
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
gauge,
wgauge,
ost,
ncpu,
instance,
)
options = _standardize_optimize_options(options, instance)
res = _bayes_computation(
instance,
sample,
alpha,
de_bw_method,
de_weights,
algorithm,
mapping,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
verbose,
options,
ncpu,
)
instance._last_update = "Bayesian Optimization"
if return_br:
if not inplace:
return instance, res
else:
return res
else:
if not inplace:
return instance
[docs] def ann_optimize(
self,
net: Net | None = None,
optimizer: str = "adam",
learning_rate: float = 0.003,
control_vector: str | list | tuple | None = None,
bounds: list | tuple | None = None,
jobs_fun: str | list | tuple = "nse",
wjobs_fun: list | tuple | None = None,
event_seg: dict | None = None,
gauge: str | list | tuple = "downstream",
wgauge: str | list | tuple = "mean",
ost: str | pd.Timestamp | None = None,
epochs: int = 400,
early_stopping: bool = False,
random_state: int | None = None,
verbose: bool = True,
inplace: bool = False,
return_net: bool = False,
):
"""
Optimize the Model using Artificial Neural Network.
.. hint::
See the :ref:`user_guide` and :ref:`math_num_documentation` for more.
Parameters
----------
net : Net or None, default None
The `Net` object to be trained to learn the descriptors-to-parameters mapping.
.. note::
If not given, a default network will be used. Otherwise, perform operation in-place on this **net**.
optimizer : str, default 'adam'
Name of optimizer. Only used if **net** is not set.
Should be one of
- 'sgd'
- 'adam'
- 'adagrad'
- 'rmsprop'
learning_rate : float, default 0.003
The learning rate used to update the weights during training. Only used if **net** is not set.
control_vector : str, sequence or None, default None
Parameters and/or states to be optimized. The control vector argument
can be any parameter or state name or any sequence of parameter and/or state names.
.. note::
If not given, the control vector will be composed of the parameters of the structure defined in the Model setup.
bounds : dict or None, default None
Bounds on control vector. The bounds argument is a dictionary where keys are the name of the
parameters and/or states in the control vector (can be a subset of control vector sequence)
and the values are pairs of ``(min, max)`` values (i.e. list or tuple) with ``min`` lower than ``max``.
None value inside the dictionary will be filled in with default bound values.
.. note::
If not given, the bounds will be filled in with default bound values.
jobs_fun, wjobs_fun, event_seg, gauge, wgauge, ost : multiple types
Optimization setting to run the forward hydrological model and compute the cost values.
See `smash.Model.optimize` for more.
epochs : int, default 400
The number of epochs to train the network.
early_stopping : bool, default False
Stop updating weights and biases when the loss function stops decreasing.
random_state : int or None, default None
Random seed used to initialize weights. Only used if **net** is not set.
.. note::
If not given and **net** is not set, the weights will be initialized with a random seed.
verbose : bool, default True
Display information while training.
inplace : bool, default False
If True, perform operation in-place.
return_net : bool, default False
If True and the default graph is used (**net** is not set), also return the trained neural network.
Returns
-------
Model : Model or None
Model with optimize outputs if not **inplace**.
Net : Net or None
`Net` with trained weights and biases if **return_net** and the default graph is used.
See Also
--------
Net : Artificial Neural Network initialization.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> net = model.ann_optimize(epochs=200, inplace=True, return_net=True, random_state=11)
>>> model
Structure: 'gr-a'
Spatio-Temporal dimension: (x: 28, y: 28, time: 1440)
Last update: ANN Optimization
Display a summary of the neural network
>>> net
+----------------------------------------------------------+
| Layer Type Input/Output Shape Num Parameters |
+----------------------------------------------------------+
| Dense (2,)/(18,) 54 |
| Activation (ReLU) (18,)/(18,) 0 |
| Dense (18,)/(9,) 171 |
| Activation (ReLU) (9,)/(9,) 0 |
| Dense (9,)/(4,) 40 |
| Activation (Sigmoid) (4,)/(4,) 0 |
| Scale (MinMaxScale) (4,)/(4,) 0 |
+----------------------------------------------------------+
Total parameters: 265
Trainable parameters: 265
Optimizer: (adam, lr=0.003)
Access to some training information
>>> net.history['loss_train'] # training loss
[1.2064831256866455, ..., 0.03552241995930672]
>>> net.layers[0].weight # trained weights of the first layer
array([[-0.35024701, -0.5263885 , 0.06432176, 0.31493864, -0.08741257,
-0.01596381, -0.53372188, -0.01383371, 0.54957057, 0.51538232,
0.23674032, -0.42860816, 0.53083172, 0.42429858, -0.24634816,
0.07233667, -0.58225892, -0.34835798],
[-0.20115953, -0.37473829, 0.43865405, 0.48463052, -0.17020534,
-0.19849597, -0.42540381, -0.4557565 , 0.3663841 , 0.27515033,
-0.50145176, -0.02213097, 0.02078811, 0.48562112, 0.40088665,
0.12205882, -0.00624188, 0.62118917]])
"""
if inplace:
instance = self
else:
instance = self.copy()
print("</> ANN Optimize Model")
if net is None:
use_default_graph = True
else:
use_default_graph = False
# % standardize args
(
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
) = _standardize_ann_optimize_args(
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
gauge,
wgauge,
ost,
instance,
)
net = _ann_optimize(
instance,
control_vector,
jobs_fun,
wjobs_fun,
event_seg,
bounds,
wgauge,
ost,
net,
optimizer,
learning_rate,
epochs,
early_stopping,
random_state,
verbose,
)
instance._last_update = "ANN Optimization"
if return_net and use_default_graph:
if not inplace:
return instance, net
else:
return net
else:
if not inplace:
return instance
[docs] def event_segmentation(self, peak_quant: float = 0.995, max_duration: float = 240):
"""
Compute segmentation information of flood events over all catchments of the Model.
.. hint::
See the :ref:`User Guide <user_guide.in_depth.event_segmentation>` and :ref:`Math / Num Documentation <math_num_documentation.signal_analysis.hydrograph_segmentation>` for more.
Parameters
----------
peak_quant: float, default 0.995
Events will be selected if their discharge peaks exceed the **peak_quant**-quantile of the observed discharge timeseries.
max_duration: float, default 240
The expected maximum duration of an event (in hours). If multiple events are detected, their duration may exceed this value.
Returns
-------
res : pandas.DataFrame
Flood events information obtained from segmentation algorithm.
The dataframe has 6 columns which are
- 'code' : the catchment code.
- 'start' : the beginning of event.
- 'end' : the end of event.
- 'maxrainfall' : the moment that the maximum precipation is observed.
- 'flood' : the moment that the maximum discharge is observed.
- 'season' : the season that event occurrs.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
Perform segmentation algorithm and display flood events infomation:
>>> res = model.event_segmentation()
>>> res
code start flood season
0 V3524010 2014-11-03 03:00:00 ... 2014-11-04 19:00:00 autumn
1 V3515010 2014-11-03 10:00:00 ... 2014-11-04 20:00:00 autumn
2 V3517010 2014-11-03 08:00:00 ... 2014-11-04 16:00:00 autumn
[3 rows x 6 columns]
"""
print("</> Model Event Segmentation")
return _event_segmentation(self, peak_quant, max_duration)
[docs] def signatures(
self,
sign: str | list | None = None,
obs_comp: bool = True,
event_seg: dict | None = None,
):
"""
Compute continuous or/and flood event signatures of the Model.
.. hint::
See the :ref:`User Guide <user_guide.in_depth.signatures.computation>` and :ref:`Math / Num Documentation <math_num_documentation.signal_analysis.hydrological_signatures>` for more.
Parameters
----------
sign : str, list of str or None, default None
Define signature(s) to compute. Should be one of
- 'Crc', 'Crchf', 'Crclf', 'Crch2r', 'Cfp2', 'Cfp10', 'Cfp50', 'Cfp90' (continuous signatures)
- 'Eff', 'Ebf', 'Erc', 'Erchf', 'Erclf', 'Erch2r', 'Elt', 'Epf' (flood event signatures)
.. note::
If not given, all of continuous and flood event signatures will be computed.
obs_comp : bool, default True
If True, compute also the signatures from observed discharge in addition to simulated discharge.
event_seg : dict or None, default None
A dictionary of event segmentation options when calculating flood event signatures. The keys are
- 'peak_quant'
- 'max_duration'
See `smash.Model.event_segmentation` for more.
.. note::
If not given in case flood signatures are computed, the default values will be set for these parameters.
Returns
-------
res : SignResult
The signatures computation results represented as a `SignResult` object.
See Also
--------
SignResult: Represents signatures computation result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> model.run(inplace=True)
Compute all observed and simulated signatures:
>>> res = model.signatures()
>>> res.cont["obs"] # observed continuous signatures
code Crc Crchf ... Cfp50 Cfp90
0 V3524010 0.516207 0.191349 ... 3.3225 42.631802
1 V3515010 0.509180 0.147217 ... 1.5755 10.628400
2 V3517010 0.514302 0.148364 ... 0.3235 2.776700
[3 rows x 9 columns]
>>> res.event["sim"] # simulated flood event signatures
code season start ... Elt Epf
0 V3524010 autumn 2014-11-03 03:00:00 ... 3 106.190651
1 V3515010 autumn 2014-11-03 10:00:00 ... 0 21.160324
2 V3517010 autumn 2014-11-03 08:00:00 ... 1 5.613392
[3 rows x 12 columns]
"""
print("</> Model Signatures")
cs, es = _standardize_signatures(sign)
event_seg = _standardize_event_seg_options(event_seg)
return _signatures(self, cs, es, obs_comp, **event_seg)
[docs] def signatures_sensitivity(
self,
problem: dict | None = None,
n: int = 64,
sign: str | list[str] | None = None,
event_seg: dict | None = None,
random_state: int | None = None,
):
"""
Compute the first- and total-order variance-based sensitivity (Sobol indices) of spatially uniform hydrological model parameters on the output signatures.
.. hint::
See the :ref:`User Guide <user_guide.in_depth.signatures.sensitivity>` and :ref:`Math / Num Documentation <math_num_documentation.signal_analysis.hydrological_signatures>` for more.
Parameters
----------
problem : dict or None, default None
Problem definition to generate a multiple set of spatially uniform Model parameters. The keys are
- 'num_vars' : the number of Model parameters.
- 'names' : the name of Model parameters.
- 'bounds' : the upper and lower bounds of each Model parameters (a sequence of ``(min, max)``).
.. note::
If not given, the problem will be set automatically using `smash.Model.get_bound_constraints` method.
n : int, default 64
Number of trajectories to generate for each model parameter (ideally a power of 2).
Then the number of sample to generate is equal to :math:`N(D+2)`
where :math:`D` is the number of model parameters.
See `here <https://salib.readthedocs.io/en/latest/api/SALib.sample.html#SALib.sample.sobol.sample>`__ for more details.
sign : str, list or None, default None
Define signature(s) to compute. Should be one of
- 'Crc', 'Crchf', 'Crclf', 'Crch2r', 'Cfp2', 'Cfp10', 'Cfp50', 'Cfp90' (continuous signatures)
- 'Eff', 'Ebf', 'Erc', 'Erchf', 'Erclf', 'Erch2r', 'Elt', 'Epf' (flood event signatures)
.. note::
If not given, all of continuous and flood event signatures will be computed.
event_seg : dict or None, default None
A dictionary of event segmentation options when calculating flood event signatures. The keys are
- 'peak_quant'
- 'max_duration'
See `smash.Model.event_segmentation` for more.
.. note::
If not given in case flood signatures are computed, the default values will be set for these parameters.
random_state : int or None, default None
Random seed used to generate samples for sensitivity computation.
.. note::
If not given, generates the parameters set with a random seed.
Returns
-------
res : SignSensResult
The signatures sensitivity computation results represented as a `SignSensResult` object.
See Also
--------
SignSensResult: Represents signatures sensitivity computation result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> res = model.signatures_sensitivity(random_state=99)
Total-order sensitivity indices of production parameter `cp` on continuous signatures:
>>> res.cont["total_si"]["cp"]
code Crc Crchf ... Cfp50 Cfp90
0 V3524010 0.089630 0.023528 ... 1.092226 0.009049
1 V3515010 0.064792 0.023358 ... 0.353862 0.011404
2 V3517010 0.030655 0.028925 ... 0.113354 0.010977
[3 rows x 9 columns]
First-order sensitivity indices of linear routing parameter `lr` on flood event signatures:
>>> res.event["first_si"]["lr"]
code season start ... Elt Epf
0 V3524010 autumn 2014-11-03 03:00:00 ... 0.358599 0.015600
1 V3515010 autumn 2014-11-03 10:00:00 ... 0.344644 0.010241
2 V3517010 autumn 2014-11-03 08:00:00 ... 0.063007 0.010851
[3 rows x 12 columns]
"""
print("</> Model Signatures Sensitivity")
instance = self.copy()
cs, es = _standardize_signatures(sign)
problem = _standardize_problem(problem, instance.setup, False)
event_seg = _standardize_event_seg_options(event_seg)
res = _signatures_sensitivity(
instance, problem, n, cs, es, random_state, **event_seg
)
return res
[docs] def prcp_indices(self):
"""
Compute precipitations indices of the Model.
4 precipitation indices are calculated for each gauge and each time step:
- ``std`` : The precipitation spatial standard deviation,
- ``d1`` : The first scaled moment, :cite:p:`zocatelli_2011`
- ``d2`` : The second scaled moment, :cite:p:`zocatelli_2011`
- ``vg`` : The vertical gap. :cite:p:`emmanuel_2015`
.. hint::
See the :ref:`User Guide <user_guide.in_depth.prcp_indices>` for more.
Returns
-------
res : PrcpIndicesResult
The precipitation indices results represented as a `PrcpIndicesResult` object.
See Also
--------
PrcpIndicesResult: Represents the precipitation indices result.
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> res = model.prcp_indices()
>>> res
d1: array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
...
vg: array([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
Each attribute is a numpy.ndarray of shape (number of gauge, number of time step)
>>> res.d1.shape
(3, 1440)
NaN value means that there is no precipitation at this specific gauge and time step.
Using numpy.where to find the index where precipitation indices were calculated on the most downstream gauge for the first scaled moment.
>>> ind = np.argwhere(~np.isnan(res.d1[0,:])).squeeze()
Viewing the first scaled moment on the first time step where rainfall occured on the most downstream gauge.
>>> res.d1[0, ind[0]]
1.209175
"""
print("</> Model Precipitation Indices")
return _prcp_indices(self)
[docs] def get_bound_constraints(self, states: bool = False):
"""
Get the boundary constraints of the Model parameters/states.
Parameters
----------
states : bool, default True
If True, return boundary constraints of the Model states instead of Model parameters.
Returns
-------
problem : dict
The boundary constraint problem of the Model parameters/states. The keys are
- 'num_vars': The number of Model parameters/states.
- 'names': The name of Model parameters/states.
- 'bounds': The upper and lower bounds of each Model parameters/states (a sequence of ``(min, max)``).
Examples
--------
>>> setup, mesh = smash.load_dataset("cance")
>>> model = smash.Model(setup, mesh)
>>> problem = model.get_bound_constraints()
>>> problem
{
'num_vars': 4,
'names': ['cp', 'cft', 'exc', 'lr'],
'bounds': [[1e-06, 1000.0], [1e-06, 1000.0], [-50.0, 50.0], [1e-06, 1000.0]]
}
"""
return _get_bound_constraints(self.setup, states)