# Copyright 2022 - 2026 The PyMC Labs Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Functions that generate data sets used in examples
"""
import numpy as np
import pandas as pd
from scipy.stats import gamma
from statsmodels.nonparametric.smoothers_lowess import lowess
default_lowess_kwargs: dict[str, float | int] = {"frac": 0.2, "it": 0}
RANDOM_SEED: int = 8927
def _smoothed_gaussian_random_walk(
gaussian_random_walk_mu: float,
gaussian_random_walk_sigma: float,
N: int,
lowess_kwargs: dict,
rng: np.random.Generator,
) -> tuple[np.ndarray, np.ndarray]:
"""
Generates Gaussian random walk data and applies LOWESS.
:param gaussian_random_walk_mu:
Mean of the random walk
:param gaussian_random_walk_sigma:
Standard deviation of the random walk
:param N:
Length of the random walk
:param lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess
:param rng:
NumPy random number generator instance
"""
x = np.arange(N)
y = rng.normal(gaussian_random_walk_mu, gaussian_random_walk_sigma, N).cumsum()
filtered = lowess(y, x, **lowess_kwargs)
y = filtered[:, 1]
return (x, y)
[docs]
def generate_synthetic_control_data(
N: int = 100,
treatment_time: int = 70,
grw_mu: float = 0.25,
grw_sigma: float = 1,
lowess_kwargs: dict = default_lowess_kwargs,
seed: int | None = None,
) -> tuple[pd.DataFrame, np.ndarray]:
"""
Generates data for synthetic control example.
:param N:
Number of data points
:param treatment_time:
Index where treatment begins in the generated dataframe
:param grw_mu:
Mean of Gaussian Random Walk
:param grw_sigma:
Standard deviation of Gaussian Random Walk
:lowess_kwargs:
Keyword argument dictionary passed to statsmodels lowess
:param seed:
Random seed for reproducibility
Example
--------
>>> from causalpy.data.simulate_data import generate_synthetic_control_data
>>> df, weightings_true = generate_synthetic_control_data(
... treatment_time=70, seed=42
... )
"""
rng = np.random.default_rng(seed)
# 1. Generate non-treated variables
df = pd.DataFrame(
{
"a": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"b": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"c": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"d": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"e": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"f": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
"g": _smoothed_gaussian_random_walk(
grw_mu, grw_sigma, N, lowess_kwargs, rng
)[1],
}
)
# 2. Generate counterfactual, based on weighted sum of non-treated variables. This
# is the counterfactual with NO treatment.
weightings_true = rng.dirichlet(np.ones(7), size=1)
df["counterfactual"] = np.dot(df.to_numpy(), weightings_true.T)
# 3. Generate the causal effect
causal_effect = gamma(10).pdf(np.arange(0, N, 1) - treatment_time)
df["causal effect"] = causal_effect * -50
# 4. Generate the actually observed data, ie the treated with the causal effect
# applied
df["actual"] = df["counterfactual"] + df["causal effect"]
# 5. apply observation noise to all relevant variables
for var in ["actual", "a", "b", "c", "d", "e", "f", "g"]:
df[var] += rng.normal(0, 0.25, N)
return df, weightings_true
[docs]
def generate_time_series_data_seasonal(
treatment_time: pd.Timestamp,
seed: int | None = None,
) -> pd.DataFrame:
"""
Generates 10 years of monthly data with seasonality
:param treatment_time:
Timestamp of when treatment begins
:param seed:
Random seed for reproducibility
"""
rng = np.random.default_rng(seed)
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="ME"
)
df = pd.DataFrame(data={"date": dates})
df = df.assign(
year=lambda x: x["date"].dt.year,
month=lambda x: x["date"].dt.month,
t=df.index,
).set_index("date", drop=True)
month_effect = np.array([11, 13, 12, 15, 19, 23, 21, 28, 20, 17, 15, 12])
df["y"] = 0.2 * df["t"] + 2 * month_effect[np.asarray(df.month.values) - 1]
N = df.shape[0]
idx = np.arange(N)[df.index > treatment_time]
df["causal effect"] = 100 * gamma(10).pdf(
np.array(np.arange(0, N, 1)) - int(np.min(idx))
)
df["y"] += df["causal effect"]
df["y"] += rng.normal(0, 2, N)
# add intercept
df["intercept"] = np.ones(df.shape[0])
return df
[docs]
def generate_time_series_data_simple(
treatment_time: pd.Timestamp,
slope: float = 0.0,
seed: int | None = None,
) -> pd.DataFrame:
"""Generate simple interrupted time series data, with no seasonality or temporal
structure.
:param treatment_time:
Timestamp of when treatment begins
:param slope:
Slope of the linear trend
:param seed:
Random seed for reproducibility
"""
rng = np.random.default_rng(seed)
dates = pd.date_range(
start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="ME"
)
df = pd.DataFrame(data={"date": dates})
df = df.assign(
linear_trend=df.index,
).set_index("date", drop=True)
df["timeseries"] = slope * df["linear_trend"]
N = df.shape[0]
df["causal effect"] = (df.index > treatment_time) * 2
df["timeseries"] += df["causal effect"]
# add intercept
df["intercept"] = np.ones(df.shape[0])
# add observation noise
df["timeseries"] += rng.normal(0, 0.25, N)
return df
[docs]
def generate_did(seed: int | None = None) -> pd.DataFrame:
"""
Generate Difference in Differences data
:param seed:
Random seed for reproducibility
Example
--------
>>> from causalpy.data.simulate_data import generate_did
>>> df = generate_did(seed=42)
"""
rng = np.random.default_rng(seed)
# true parameters
control_intercept = 1
treat_intercept_delta = 0.25
trend = 1
Δ = 0.5
intervention_time = 0.5
# local functions
def outcome(
t: np.ndarray,
control_intercept: float,
treat_intercept_delta: float,
trend: float,
Δ: float,
group: np.ndarray,
post_treatment: np.ndarray,
) -> np.ndarray:
"""Compute the outcome of each unit"""
return (
control_intercept
+ (treat_intercept_delta * group)
+ (t * trend)
+ (Δ * post_treatment * group)
)
df = pd.DataFrame(
{
"group": [0, 0, 1, 1] * 10,
"t": [0.0, 1.0, 0.0, 1.0] * 10,
"unit": np.concatenate([[i] * 2 for i in range(20)]),
}
)
df["post_treatment"] = df["t"] > intervention_time
df["y"] = outcome(
np.asarray(df["t"]),
control_intercept,
treat_intercept_delta,
trend,
Δ,
np.asarray(df["group"]),
np.asarray(df["post_treatment"]),
)
df["y"] += rng.normal(0, 0.1, df.shape[0])
return df
[docs]
def generate_regression_discontinuity_data(
N: int = 100,
true_causal_impact: float = 0.5,
true_treatment_threshold: float = 0.0,
seed: int | None = None,
) -> pd.DataFrame:
"""
Generate regression discontinuity example data
:param seed:
Random seed for reproducibility
Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_regression_discontinuity_data
>>> df = generate_regression_discontinuity_data(
... true_treatment_threshold=0.5, seed=42
... )
"""
rng = np.random.default_rng(seed)
def is_treated(x: np.ndarray) -> np.ndarray:
"""Check if x was treated"""
return np.greater_equal(x, true_treatment_threshold)
def impact(x: np.ndarray) -> np.ndarray:
"""Assign true_causal_impact to all treated entries"""
y = np.zeros(len(x))
y[is_treated(x)] = true_causal_impact
return y
x = np.sort(rng.uniform(-1, 1, size=N))
y = np.sin(x * 3) + impact(x) + rng.normal(0, 0.1, size=N)
return pd.DataFrame({"x": x, "y": y, "treated": is_treated(x)})
[docs]
def generate_ancova_data(
N: int = 200,
pre_treatment_means: np.ndarray | None = None,
treatment_effect: int = 2,
sigma: int = 1,
seed: int | None = None,
) -> pd.DataFrame:
"""
Generate ANCOVA example data
:param seed:
Random seed for reproducibility
Example
--------
>>> import pathlib
>>> from causalpy.data.simulate_data import generate_ancova_data
>>> df = generate_ancova_data(
... N=200,
... pre_treatment_means=np.array([10, 12]),
... treatment_effect=2,
... sigma=1,
... seed=42,
... )
"""
rng = np.random.default_rng(seed)
if pre_treatment_means is None:
pre_treatment_means = np.array([10, 12])
group = rng.choice(2, size=N)
pre = rng.normal(loc=pre_treatment_means[group])
post = pre + treatment_effect * group + rng.normal(size=N) * sigma
df = pd.DataFrame({"group": group, "pre": pre, "post": post})
return df
[docs]
def generate_geolift_data(seed: int | None = None) -> pd.DataFrame:
"""Generate synthetic geolift data using a latent factor model.
Each unit's time series is a linear combination of K=3 shared seasonal
factors (GP draws) with unit-specific loadings, plus observation noise.
Most countries share positive loadings and are therefore positively
correlated, while 2 "contrarian" countries carry a negative loading on
one factor, making them negatively correlated with the majority. The
treated unit (Denmark) is a Dirichlet-weighted combination of the
positively-loaded countries only, so it is well-reconstructed by good
donors but poorly correlated with the contrarian ones.
This mirrors the latent factor DGP used to motivate synthetic control
methods in Abadie (2010, 2021).
"""
rng = np.random.default_rng(seed)
n_years = 4
treatment_time = pd.to_datetime("2022-01-01")
causal_impact = 0.2
time = pd.date_range(start="2019-01-01", periods=52 * n_years, freq="W")
n_obs = len(time)
K = 3
factors = np.column_stack(
[
_create_series(
n=52,
amplitude=1,
length_scale=2,
n_years=n_years,
intercept=0,
rng=rng,
)
for _ in range(K)
]
) # (n_obs, K)
similar = [
"Austria",
"Belgium",
"Bulgaria",
"Croatia",
"Cyprus",
"Czech_Republic",
"Estonia",
"Finland",
]
contrarian = ["Greece", "Hungary"]
untreated = similar + contrarian
# Positive loadings for similar countries, one negative loading for contrarians
loadings: dict[str, np.ndarray] = {}
for country in similar:
loadings[country] = rng.uniform(0.3, 1.0, size=K)
loadings["Greece"] = np.array([-0.6, -0.3, 0.8])
loadings["Hungary"] = np.array([0.3, -0.7, -0.5])
df = pd.DataFrame(index=time)
df.index.name = "time"
for country in untreated:
df[country] = factors @ loadings[country] + 3 + rng.normal(0, 0.1, size=n_obs)
# Denmark as a weighted sum of similar countries only
w = rng.dirichlet(np.ones(len(similar)))
df["Denmark"] = df[similar].values @ w + rng.normal(0, 0.1, size=n_obs)
# treatment effect
df["Denmark"] += np.where(df.index < treatment_time, 0, causal_impact)
df = df.clip(lower=0)
return df
[docs]
def generate_multicell_geolift_data(
seed: int | None = None,
) -> pd.DataFrame:
"""Generate synthetic data for a geolift example. This will consists of 6 untreated
countries. The treated unit `Denmark` is a weighted combination of the untreated
units. We additionally specify a treatment effect which takes effect after the
`treatment_time`. The timeseries data is observed at weekly resolution and has
annual seasonality, with this seasonality being a drawn from a Gaussian Process with
a periodic kernel.
:param seed:
Random seed for reproducibility
"""
rng = np.random.default_rng(seed)
n_years = 4
treatment_time = pd.to_datetime("2022-01-01")
causal_impact = 0.2
time = pd.date_range(start="2019-01-01", periods=52 * n_years, freq="W")
untreated = [
"u1",
"u2",
"u3",
"u4",
"u5",
"u6",
"u7",
"u8",
"u9",
"u10",
"u11",
"u12",
]
df = (
pd.DataFrame(
{
country: _create_series(
n=52,
amplitude=1,
length_scale=2,
n_years=n_years,
intercept=3,
rng=rng,
)
for country in untreated
}
)
.assign(time=time)
.set_index("time")
)
treated = ["t1", "t2", "t3", "t4"]
for treated_geo in treated:
# create treated unit as a weighted sum of the untreated units
weights = rng.dirichlet(np.ones(len(untreated)))
df[treated_geo] = np.dot(df[untreated].values, weights)
# add treatment effect
df[treated_geo] += np.where(df.index < treatment_time, 0, causal_impact)
# add observation noise to all geos
for col in untreated + treated:
df[col] += rng.normal(size=len(df), scale=0.1)
# ensure we never see any negative sales
df = df.clip(lower=0)
return df
# -----------------
# UTILITY FUNCTIONS
# -----------------
def _generate_seasonality(
n: int,
amplitude: int,
length_scale: float,
rng: np.random.Generator,
) -> np.ndarray:
"""Generate monthly seasonality by sampling from a Gaussian process with a
Gaussian kernel, using numpy code
:param rng:
NumPy random number generator instance
"""
# Generate the covariance matrix
x = np.linspace(0, 1, n)
x1, x2 = np.meshgrid(x, x)
cov = _periodic_kernel(
x1, x2, period=1, length_scale=length_scale, amplitude=amplitude
)
# Generate the seasonality
return rng.multivariate_normal(np.zeros(n), cov)
def _periodic_kernel(
x1: np.ndarray,
x2: np.ndarray,
period: int = 1,
length_scale: float = 1.0,
amplitude: int = 1,
) -> np.ndarray:
"""Generate a periodic kernel for gaussian process"""
return amplitude**2 * np.exp(
-2 * np.sin(np.pi * np.abs(x1 - x2) / period) ** 2 / length_scale**2
)
def _create_series(
n: int,
amplitude: int,
length_scale: int,
n_years: int,
intercept: int,
rng: np.random.Generator,
) -> np.ndarray:
"""
Returns numpy tile with generated seasonality data repeated over
multiple years
:param rng:
NumPy random number generator instance
"""
return np.tile(
_generate_seasonality(
n=n, amplitude=amplitude, length_scale=length_scale, rng=rng
)
+ intercept,
n_years,
)
[docs]
def generate_staggered_did_data(
n_units: int = 50,
n_time_periods: int = 20,
treatment_cohorts: dict[int, int] | None = None,
treatment_effects: dict[int, float] | None = None,
unit_fe_scale: float = 2.0,
time_fe_scale: float = 1.0,
sigma: float = 0.5,
seed: int | None = None,
) -> pd.DataFrame:
"""
Generate synthetic panel data with staggered treatment adoption.
Creates a balanced panel dataset where different cohorts of units receive
treatment at different times. Supports dynamic treatment effects that vary
by event-time (time relative to treatment).
Parameters
----------
n_units : int, default=50
Total number of units in the panel.
n_time_periods : int, default=20
Number of time periods in the panel.
treatment_cohorts : dict[int, int], optional
Dictionary mapping treatment time to number of units in that cohort.
Units not assigned to any cohort are never-treated.
Default: {5: 10, 10: 10, 15: 10} (3 cohorts of 10 units each,
leaving 20 never-treated units).
treatment_effects : dict[int, float], optional
Dictionary mapping event-time (t - G) to treatment effect.
Event-time 0 is the first treated period.
Default: {0: 1.0, 1: 1.5, 2: 2.0, 3: 2.5} with constant effect
of 2.5 for all subsequent periods.
unit_fe_scale : float, default=2.0
Scale of unit fixed effects (drawn from Normal(0, unit_fe_scale)).
time_fe_scale : float, default=1.0
Scale of time fixed effects (drawn from Normal(0, time_fe_scale)).
sigma : float, default=0.5
Standard deviation of idiosyncratic noise.
seed : int, optional
Random seed for reproducibility.
Returns
-------
pd.DataFrame
Panel data with columns:
- unit: Unit identifier
- time: Time period
- treated: Binary indicator (1 if treated at time t, 0 otherwise)
- treatment_time: Time of treatment adoption (np.inf for never-treated)
- y: Observed outcome
- y0: Counterfactual outcome (for validation)
- tau: True treatment effect (for validation)
Examples
--------
>>> from causalpy.data.simulate_data import generate_staggered_did_data
>>> df = generate_staggered_did_data(n_units=30, n_time_periods=15, seed=42)
>>> df.head()
unit time treated treatment_time ...
Notes
-----
The data generating process is:
.. math::
Y_{it} = \\alpha_i + \\lambda_t + \\tau_{it} \\cdot D_{it} + \\varepsilon_{it}
where :math:`\\alpha_i` is the unit fixed effect, :math:`\\lambda_t` is the
time fixed effect, :math:`D_{it}` is the treatment indicator, and
:math:`\\tau_{it}` is the dynamic treatment effect that depends on
event-time :math:`e = t - G_i`.
"""
rng = np.random.default_rng(seed)
# Default treatment cohorts: 3 cohorts at times 5, 10, 15
if treatment_cohorts is None:
treatment_cohorts = {5: 10, 10: 10, 15: 10}
# Default dynamic treatment effects: ramp up then stabilize
if treatment_effects is None:
treatment_effects = {0: 1.0, 1: 1.5, 2: 2.0, 3: 2.5}
# Validate cohort assignments don't exceed n_units
total_treated = sum(treatment_cohorts.values())
if total_treated > n_units:
raise ValueError(
f"Total units in treatment cohorts ({total_treated}) "
f"exceeds n_units ({n_units})"
)
# Generate unit fixed effects
unit_fe = rng.normal(0, unit_fe_scale, n_units)
# Generate time fixed effects
time_fe = rng.normal(0, time_fe_scale, n_time_periods)
# Assign treatment times to units
treatment_times = np.full(n_units, np.inf) # Default: never treated
unit_idx = 0
for g, n_cohort in treatment_cohorts.items():
treatment_times[unit_idx : unit_idx + n_cohort] = g
unit_idx += n_cohort
# Shuffle treatment assignments
rng.shuffle(treatment_times)
# Build panel data
rows = []
for i in range(n_units):
for t in range(n_time_periods):
g_i = treatment_times[i]
is_treated = t >= g_i
# Counterfactual outcome (no treatment)
y0 = unit_fe[i] + time_fe[t]
# Treatment effect based on event-time
if is_treated:
event_time = int(t - g_i)
# Use specified effect or last available effect for later periods
if event_time in treatment_effects:
tau = treatment_effects[event_time]
else:
# Use the effect for the maximum specified event-time
max_event_time = max(treatment_effects.keys())
tau = treatment_effects[max_event_time]
else:
tau = 0.0
# Add noise
epsilon = rng.normal(0, sigma)
# Observed outcome
y = y0 + tau + epsilon
rows.append(
{
"unit": i,
"time": t,
"treated": int(is_treated),
"treatment_time": g_i,
"y": y,
"y0": y0,
"tau": tau,
}
)
df = pd.DataFrame(rows)
return df
[docs]
def generate_piecewise_its_data(
N: int = 100,
interruption_times: list[int] | None = None,
baseline_intercept: float = 10.0,
baseline_slope: float = 0.1,
level_changes: list[float] | None = None,
slope_changes: list[float] | None = None,
noise_sigma: float = 1.0,
seed: int | None = None,
) -> tuple[pd.DataFrame, dict]:
"""
Generate piecewise Interrupted Time Series data with known ground truth parameters.
This function creates synthetic data for testing and demonstrating piecewise ITS
/ segmented regression models. The data follows the model:
y_t = β₀ + β₁t + Σₖ(level_k · I_k(t) + slope_k · R_k(t)) + ε_t
Where:
- I_k(t) = 1 if t >= T_k else 0 (step function for level change)
- R_k(t) = max(0, t - T_k) (ramp function for slope change)
Parameters
----------
N : int, default=100
Number of time points in the series.
interruption_times : list[int], optional
List of time indices where interruptions occur. Defaults to [50].
baseline_intercept : float, default=10.0
The intercept (β₀) of the baseline trend.
baseline_slope : float, default=0.1
The slope (β₁) of the baseline trend.
level_changes : list[float], optional
List of level changes at each interruption. Length must match
interruption_times. If None, defaults to [5.0] for single interruption.
slope_changes : list[float], optional
List of slope changes at each interruption. Length must match
interruption_times. If None, defaults to [0.0] (no slope change).
noise_sigma : float, default=1.0
Standard deviation of the Gaussian noise.
seed : int, optional
Random seed for reproducibility.
Returns
-------
df : pd.DataFrame
DataFrame with columns:
- 't': time index (0 to N-1)
- 'y': observed outcome with noise
- 'y_true': outcome without noise (ground truth)
- 'counterfactual': baseline trend without intervention effects
- 'effect': true causal effect at each time point
params : dict
Dictionary containing the true parameters:
- 'baseline_intercept': β₀
- 'baseline_slope': β₁
- 'level_changes': list of level changes
- 'slope_changes': list of slope changes
- 'interruption_times': list of interruption times
- 'noise_sigma': noise standard deviation
Examples
--------
>>> from causalpy.data.simulate_data import generate_piecewise_its_data
>>> # Single interruption with level and slope change
>>> df, params = generate_piecewise_its_data(
... N=100,
... interruption_times=[50],
... level_changes=[5.0],
... slope_changes=[0.2],
... seed=42,
... )
>>> df.shape
(100, 5)
>>> # Multiple interruptions
>>> df, params = generate_piecewise_its_data(
... N=150,
... interruption_times=[50, 100],
... level_changes=[3.0, -2.0],
... slope_changes=[0.1, -0.15],
... seed=42,
... )
>>> len(params["interruption_times"])
2
>>> # Level change only (no slope change)
>>> df, params = generate_piecewise_its_data(
... N=100,
... interruption_times=[50],
... level_changes=[5.0],
... slope_changes=[0.0],
... seed=42,
... )
"""
# Set defaults
if interruption_times is None:
interruption_times = [50]
n_interruptions = len(interruption_times)
if level_changes is None:
level_changes = [5.0] * n_interruptions
if slope_changes is None:
slope_changes = [0.0] * n_interruptions
# Validate inputs
if len(level_changes) != n_interruptions:
raise ValueError(
f"level_changes length ({len(level_changes)}) must match "
f"interruption_times length ({n_interruptions})"
)
if len(slope_changes) != n_interruptions:
raise ValueError(
f"slope_changes length ({len(slope_changes)}) must match "
f"interruption_times length ({n_interruptions})"
)
for t_k in interruption_times:
if t_k < 0 or t_k >= N:
raise ValueError(
f"Interruption time {t_k} is outside valid range [0, {N - 1}]"
)
rng = np.random.default_rng(seed)
# Generate time index
t = np.arange(N)
# Compute baseline (counterfactual)
counterfactual = baseline_intercept + baseline_slope * t
# Compute intervention effects
effect = np.zeros(N)
for k, t_k in enumerate(interruption_times):
# Step function: I_k(t) = 1 if t >= t_k
step = (t >= t_k).astype(float)
# Ramp function: R_k(t) = max(0, t - t_k)
ramp = np.maximum(0, t - t_k).astype(float)
effect += level_changes[k] * step + slope_changes[k] * ramp
# Compute true outcome (without noise)
y_true = counterfactual + effect
# Add noise
noise = rng.normal(0, noise_sigma, N)
y = y_true + noise
# Create DataFrame
df = pd.DataFrame(
{
"t": t,
"y": y,
"y_true": y_true,
"counterfactual": counterfactual,
"effect": effect,
}
)
# Store parameters
params = {
"baseline_intercept": baseline_intercept,
"baseline_slope": baseline_slope,
"level_changes": level_changes,
"slope_changes": slope_changes,
"interruption_times": interruption_times,
"noise_sigma": noise_sigma,
}
return df, params