openscm_calibration.calibration_demo#

Classes and functions used in our calibration demo notebook

See docs/how-to-guides/how-to-run-a-calibration.py.

These are in their own module to help the parallelisation (local functions and classes don't pickle well), but also just for clarity in the documentation.

Classes:

Name	Description
`CostCalculator`	Calculate the cost function for a given set of model results
`ExperimentResult`	Results of an experiment
`ExperimentResultCollection`	Collection of results from one or more experiments
`Timeseries`	Timeseries container

Functions:

Name	Description
`add_iteration_info`	Add iteration info to a result
`convert_results_to_plot_dict`	Convert results into a dictionary, grouped for plotting
`get_timeseries`	Get timeseries from results
`plot_timeseries`	Plot timeseries

CostCalculator #

Calculate the cost function for a given set of model results

Methods:

Name	Description
`calculate_cost`	Calculate cost function
`calculate_negative_log_likelihood`	Calculate the negative log likelihood of a given set of results

Attributes:

Name	Type	Description
`normalisation`	`Quantity`	The normalisation to apply to the difference between model results and the target
`target`	`ExperimentResultCollection`	The target to which we are calibrating

Source code in src/openscm_calibration/calibration_demo.py

@define
class CostCalculator:
    """Calculate the cost function for a given set of model results"""

    target: ExperimentResultCollection
    """The target to which we are calibrating"""

    normalisation: pint.registry.UnitRegistry.Quantity
    """
    The normalisation to apply to the difference between model results and the target
    """

    def calculate_cost(
        self,
        model_results: ExperimentResultCollection,
    ) -> float:
        """
        Calculate cost function

        Parameters
        ----------
        model_results
            Model results for which to calculate the cost

        Returns
        -------
        :
            Cost function value
        """
        model_result_dict = model_results.to_dict()
        target_dict = self.target.to_dict()

        sses = 0.0
        for experiment_id in model_result_dict:
            model_result_exp = model_result_dict[experiment_id]
            target_exp = target_dict[experiment_id]

            if not np.isclose(
                model_result_exp.time,
                target_exp.time,
            ).all():
                msg = "Time axes differ"
                raise NotImplementedError(msg)

            diff_normalised = (
                model_result_exp.values - target_exp.values
            ) / self.normalisation
            diff_squared = diff_normalised**2

            sses += diff_squared.sum()

        return sses

    def calculate_negative_log_likelihood(
        self,
        model_results: ExperimentResultCollection,
    ) -> float:
        """
        Calculate the negative log likelihood of a given set of results

        Parameters
        ----------
        model_results
            Model results for which to calculate the negative log likelihood

        Returns
        -------
        :
            Negative log likelihood (up to an additive constant)
        """
        sses = self.calculate_cost(model_results)
        # TODO: find the proof of this
        negative_log_likelihood = -sses / 2

        return negative_log_likelihood

normalisation `instance-attribute` #

normalisation: Quantity

The normalisation to apply to the difference between model results and the target

target `instance-attribute` #

target: ExperimentResultCollection

The target to which we are calibrating

calculate_cost #

calculate_cost(
    model_results: ExperimentResultCollection,
) -> float

Calculate cost function

Parameters:

Name	Type	Description	Default
`model_results`	`ExperimentResultCollection`	Model results for which to calculate the cost	required

Returns:

Type	Description
`float`	Cost function value

Source code in src/openscm_calibration/calibration_demo.py

def calculate_cost(
    self,
    model_results: ExperimentResultCollection,
) -> float:
    """
    Calculate cost function

    Parameters
    ----------
    model_results
        Model results for which to calculate the cost

    Returns
    -------
    :
        Cost function value
    """
    model_result_dict = model_results.to_dict()
    target_dict = self.target.to_dict()

    sses = 0.0
    for experiment_id in model_result_dict:
        model_result_exp = model_result_dict[experiment_id]
        target_exp = target_dict[experiment_id]

        if not np.isclose(
            model_result_exp.time,
            target_exp.time,
        ).all():
            msg = "Time axes differ"
            raise NotImplementedError(msg)

        diff_normalised = (
            model_result_exp.values - target_exp.values
        ) / self.normalisation
        diff_squared = diff_normalised**2

        sses += diff_squared.sum()

    return sses

calculate_negative_log_likelihood #

calculate_negative_log_likelihood(
    model_results: ExperimentResultCollection,
) -> float

Calculate the negative log likelihood of a given set of results

Parameters:

Name	Type	Description	Default
`model_results`	`ExperimentResultCollection`	Model results for which to calculate the negative log likelihood	required

Returns:

Type	Description
`float`	Negative log likelihood (up to an additive constant)

Source code in src/openscm_calibration/calibration_demo.py

def calculate_negative_log_likelihood(
    self,
    model_results: ExperimentResultCollection,
) -> float:
    """
    Calculate the negative log likelihood of a given set of results

    Parameters
    ----------
    model_results
        Model results for which to calculate the negative log likelihood

    Returns
    -------
    :
        Negative log likelihood (up to an additive constant)
    """
    sses = self.calculate_cost(model_results)
    # TODO: find the proof of this
    negative_log_likelihood = -sses / 2

    return negative_log_likelihood

ExperimentResult #

Results of an experiment

Attributes:

Name	Type	Description
`experiment_id`	`str`	ID of the experiment
`result`	`Timeseries`	The position of the mass over time in the experiment

Source code in src/openscm_calibration/calibration_demo.py

@define
class ExperimentResult:
    """Results of an experiment"""

    experiment_id: str
    """ID of the experiment"""

    result: Timeseries
    """The position of the mass over time in the experiment"""

experiment_id `instance-attribute` #

experiment_id: str

ID of the experiment

result `instance-attribute` #

result: Timeseries

The position of the mass over time in the experiment

ExperimentResultCollection #

Collection of results from one or more experiments

Methods:

Name	Description
`lineplot`	Make a line plot
`to_dict`	Convert to a dictionary
`to_timeseries`	Convert self to timeseries

Attributes:

Name	Type	Description
`iteration`	`int \| None`	Optimisation iteration in which these results were generated
`results`	`tuple[ExperimentResult, ...]`	Results of the experiments

Source code in src/openscm_calibration/calibration_demo.py

@define
class ExperimentResultCollection:
    """Collection of results from one or more experiments"""

    results: tuple[ExperimentResult, ...]
    """Results of the experiments"""

    iteration: int | None = None
    """Optimisation iteration in which these results were generated"""

    def to_dict(self) -> dict[str, Timeseries]:
        """Convert to a dictionary"""
        return {r.experiment_id: r.result for r in self.results}

    def to_timeseries(
        self,
        value_unit: str | None = None,
        time_unit: str = "yr",
    ) -> pd.DataFrame:
        """
        Convert self to timeseries

        In OpenSCM Calibration, timeseries means that the time axis is the index

        Parameters
        ----------
        value_unit
            Unit to use for the values.

            If not supplied, no conversion is performed.

        time_unit
            Unit to use for the time axis

        Returns
        -------
        :
            'Timeseries' view of the data in `self`
        """
        try:
            import pandas as pd
        except ImportError as exc:
            raise MissingOptionalDependencyError(
                "to_timeseries", requirement="pandas"
            ) from exc

        ts_l = []
        for result in self.results:
            values = result.result.values
            if value_unit is not None:
                # pint weirdness causing type hinting issues
                values = values.to(value_unit)  # type: ignore

            df_r = pd.DataFrame(
                values.m,
                columns=pd.MultiIndex.from_tuples(
                    tuples=[(result.experiment_id, values.u)],
                    names=["experiment_id", "unit"],
                ),
                index=result.result.time.to(time_unit).m,
            )
            ts_l.append(df_r)

        ts = pd.concat(ts_l, axis="columns")

        return ts

    def lineplot(
        self,
        ax: matplotlib.axes.Axes,
        x_units: str = "yr",
        y_units: str = "m",
    ) -> matplotlib.axes.Axes:
        """
        Make a line plot

        Parameters
        ----------
        ax
            Axes on which to plot

        x_units
            Units to use for the x-axis

        y_units
            Units to use for the y-axis

        Returns
        -------
        :
            Axes on which the plot was made
        """
        for result in self.results:
            ax.plot(
                result.result.time.to(x_units).m,
                result.result.values.to(y_units).m,
                label=result.experiment_id,
            )

        ax.set_ylabel(f"[{y_units}]")
        ax.set_xlabel(f"[{x_units}]")

        return ax

iteration `class-attribute` `instance-attribute` #

iteration: int | None = None

Optimisation iteration in which these results were generated

results `instance-attribute` #

results: tuple[ExperimentResult, ...]

Results of the experiments

lineplot #

lineplot(
    ax: Axes, x_units: str = "yr", y_units: str = "m"
) -> Axes

Make a line plot

Parameters:

Name	Type	Description	Default
`ax`	`Axes`	Axes on which to plot	required
`x_units`	`str`	Units to use for the x-axis	`'yr'`
`y_units`	`str`	Units to use for the y-axis	`'m'`

Returns:

Type	Description
`Axes`	Axes on which the plot was made

Source code in src/openscm_calibration/calibration_demo.py

def lineplot(
    self,
    ax: matplotlib.axes.Axes,
    x_units: str = "yr",
    y_units: str = "m",
) -> matplotlib.axes.Axes:
    """
    Make a line plot

    Parameters
    ----------
    ax
        Axes on which to plot

    x_units
        Units to use for the x-axis

    y_units
        Units to use for the y-axis

    Returns
    -------
    :
        Axes on which the plot was made
    """
    for result in self.results:
        ax.plot(
            result.result.time.to(x_units).m,
            result.result.values.to(y_units).m,
            label=result.experiment_id,
        )

    ax.set_ylabel(f"[{y_units}]")
    ax.set_xlabel(f"[{x_units}]")

    return ax

to_dict #

to_dict() -> dict[str, Timeseries]

Convert to a dictionary

Source code in src/openscm_calibration/calibration_demo.py

def to_dict(self) -> dict[str, Timeseries]:
    """Convert to a dictionary"""
    return {r.experiment_id: r.result for r in self.results}

to_timeseries #

to_timeseries(
    value_unit: str | None = None, time_unit: str = "yr"
) -> DataFrame

Convert self to timeseries

In OpenSCM Calibration, timeseries means that the time axis is the index

Parameters:

Name	Type	Description	Default
`value_unit`	`str \| None`	Unit to use for the values. If not supplied, no conversion is performed.	`None`
`time_unit`	`str`	Unit to use for the time axis	`'yr'`

Returns:

Type	Description
`DataFrame`	'Timeseries' view of the data in `self`

Source code in src/openscm_calibration/calibration_demo.py

def to_timeseries(
    self,
    value_unit: str | None = None,
    time_unit: str = "yr",
) -> pd.DataFrame:
    """
    Convert self to timeseries

    In OpenSCM Calibration, timeseries means that the time axis is the index

    Parameters
    ----------
    value_unit
        Unit to use for the values.

        If not supplied, no conversion is performed.

    time_unit
        Unit to use for the time axis

    Returns
    -------
    :
        'Timeseries' view of the data in `self`
    """
    try:
        import pandas as pd
    except ImportError as exc:
        raise MissingOptionalDependencyError(
            "to_timeseries", requirement="pandas"
        ) from exc

    ts_l = []
    for result in self.results:
        values = result.result.values
        if value_unit is not None:
            # pint weirdness causing type hinting issues
            values = values.to(value_unit)  # type: ignore

        df_r = pd.DataFrame(
            values.m,
            columns=pd.MultiIndex.from_tuples(
                tuples=[(result.experiment_id, values.u)],
                names=["experiment_id", "unit"],
            ),
            index=result.result.time.to(time_unit).m,
        )
        ts_l.append(df_r)

    ts = pd.concat(ts_l, axis="columns")

    return ts

Timeseries #

Timeseries container

Attributes:

Name	Type	Description
`time`	`Quantity`	The time axis
`values`	`Quantity`	The timeseries value at each point in time

Source code in src/openscm_calibration/calibration_demo.py

@define
class Timeseries:
    """Timeseries container"""

    time: pint.registry.UnitRegistry.Quantity
    """The time axis"""

    values: pint.registry.UnitRegistry.Quantity
    """The timeseries value at each point in time"""

time `instance-attribute` #

time: Quantity

The time axis

values `instance-attribute` #

values: Quantity

The timeseries value at each point in time

add_iteration_info #

add_iteration_info(
    res: ExperimentResultCollection, iteration: int
) -> ExperimentResultCollection

Add iteration info to a result

Source code in src/openscm_calibration/calibration_demo.py

def add_iteration_info(
    res: ExperimentResultCollection, iteration: int
) -> ExperimentResultCollection:
    """Add iteration info to a result"""
    res.iteration = iteration

    return res

convert_results_to_plot_dict #

convert_results_to_plot_dict(
    res: ExperimentResultCollection,
) -> dict[str, ExperimentResultCollection]

Convert results into a dictionary, grouped for plotting

Source code in src/openscm_calibration/calibration_demo.py

def convert_results_to_plot_dict(
    res: ExperimentResultCollection,
) -> dict[str, ExperimentResultCollection]:
    """Convert results into a dictionary, grouped for plotting"""
    return {
        k: ExperimentResultCollection(
            results=(
                ExperimentResult(
                    experiment_id=k,
                    result=v,
                ),
            ),
        )
        for k, v in res.to_dict().items()
    }

get_timeseries #

get_timeseries(
    res: ExperimentResultCollection,
    value_unit: str | None = None,
) -> DataFrame

Get timeseries from results

Source code in src/openscm_calibration/calibration_demo.py

def get_timeseries(
    res: ExperimentResultCollection,
    value_unit: str | None = None,
) -> pd.DataFrame:
    """
    Get timeseries from results
    """
    return res.to_timeseries()

plot_timeseries #

plot_timeseries(
    best_run: ExperimentResultCollection,
    others_to_plot: tuple[ExperimentResultCollection, ...],
    target: ExperimentResultCollection,
    convert_results_to_plot_dict: Callable[
        [ExperimentResultCollection],
        dict[str, ExperimentResultCollection],
    ],
    timeseries_keys: Iterable[str],
    axes: dict[str, Axes],
    get_timeseries: Callable[
        [ExperimentResultCollection], DataFrame
    ],
) -> None

Plot timeseries

Parameters:

Name	Type	Description	Default
`best_run`	`ExperimentResultCollection`	Best run from iterations	required
`others_to_plot`	`tuple[ExperimentResultCollection, ...]`	Other results to plot from iterations	required
`target`	`ExperimentResultCollection`	Target to which we are calibrating	required
`convert_results_to_plot_dict`	`Callable[[ExperimentResultCollection], dict[str, ExperimentResultCollection]]`	Callable which converts the data into a dictionary in which the keys are a subset of the values in `axes`	required
`timeseries_keys`	`Iterable[str]`	Keys of the timeseries to plot	required
`axes`	`dict[str, Axes]`	Axes on which to plot	required
`get_timeseries`	`Callable[[ExperimentResultCollection], DataFrame]`	Function which converts the data into a [`pandas.DataFrame`][`pandas.DataFrame`] for plotting	required

Source code in src/openscm_calibration/calibration_demo.py

def plot_timeseries(  # noqa: PLR0913
    best_run: ExperimentResultCollection,
    others_to_plot: tuple[ExperimentResultCollection, ...],
    target: ExperimentResultCollection,
    convert_results_to_plot_dict: Callable[
        [ExperimentResultCollection], dict[str, ExperimentResultCollection]
    ],
    timeseries_keys: Iterable[str],
    axes: dict[str, matplotlib.axes.Axes],
    get_timeseries: Callable[[ExperimentResultCollection], pd.DataFrame],
) -> None:
    """
    Plot timeseries

    Parameters
    ----------
    best_run
        Best run from iterations

    others_to_plot
        Other results to plot from iterations

    target
        Target to which we are calibrating

    convert_results_to_plot_dict
        Callable which converts the data into a dictionary
        in which the keys are a subset of the values in `axes`

    timeseries_keys
        Keys of the timeseries to plot

    axes
        Axes on which to plot

    get_timeseries
        Function which converts the data into a
        [`pandas.DataFrame`][] for plotting
    """
    others_to_plot_dict = defaultdict(list)
    for res in others_to_plot:
        res_dict = convert_results_to_plot_dict(res)
        for k, v in res_dict.items():
            others_to_plot_dict[k].append(v)

    target_dict = convert_results_to_plot_dict(target)
    best_dict = convert_results_to_plot_dict(best_run)

    for k in timeseries_keys:
        ax = axes[k]

        others_to_plot_k = others_to_plot_dict[k]
        for other_to_plot in others_to_plot_k:
            get_timeseries(other_to_plot).plot.line(
                ax=ax,
                legend=False,
                linewidth=0.5,
                alpha=0.3,
                color="tab:gray",
                zorder=1.5,
            )

        target_k = target_dict[k]
        get_timeseries(target_k).plot.line(
            ax=ax,
            legend=False,
            linewidth=2,
            alpha=0.8,
            color="tab:blue",
            zorder=2.0,
        )

        best_k = best_dict[k]
        get_timeseries(best_k).plot.line(
            ax=ax,
            legend=False,
            linewidth=2,
            alpha=0.8,
            color="tab:orange",
            zorder=2.5,
        )

        ax.set_ylabel(k)

openscm_calibration.calibration_demo#

CostCalculator #

normalisation instance-attribute #

target instance-attribute #

calculate_cost #

calculate_negative_log_likelihood #

ExperimentResult #

experiment_id instance-attribute #

result instance-attribute #

ExperimentResultCollection #

iteration class-attribute instance-attribute #

results instance-attribute #

lineplot #

to_dict #

to_timeseries #

Timeseries #

time instance-attribute #

values instance-attribute #

add_iteration_info #

convert_results_to_plot_dict #

get_timeseries #

plot_timeseries #

normalisation `instance-attribute` #

target `instance-attribute` #

experiment_id `instance-attribute` #

result `instance-attribute` #

iteration `class-attribute` `instance-attribute` #

results `instance-attribute` #

time `instance-attribute` #

values `instance-attribute` #