Source code for src.plotting.task_plot_incidences_by_group

"""For each available scenario plot the incidences in each of the age groups."""
import matplotlib.pyplot as plt
import pandas as pd
import pytask

from src.calculate_moments import smoothed_outcome_per_hundred_thousand_rki
from src.config import BLD
from src.config import SRC
from src.plotting.plotting import make_nice_outcome
from src.plotting.plotting import OUTCOME_TO_Y_LABEL
from src.plotting.plotting import plot_group_time_series
from src.simulation.load_simulation_inputs import create_period_outputs
from src.simulation.scenario_config import create_path_to_group_incidence_plot
from src.simulation.scenario_config import (
    create_path_to_scenario_outcome_time_series,
)


[docs]_DEPENDENCIES = { "calculate_moments.py": SRC / "calculate_moments.py", "plotting.py": SRC / "plotting" / "plotting.py", "scenario_config.py": SRC / "simulation" / "scenario_config.py", "load_simulation_inputs": SRC / "simulation" / "load_simulation_inputs.py",
}
[docs]def create_parametrization(): entries = [ entry for entry in create_period_outputs() if "_by_" in entry and "currently_infected" not in entry ] parametrization = [] for entry in entries: outcome, groupby = entry.split("_by_") depends_on = { "simulated": create_path_to_scenario_outcome_time_series( scenario_name="combined_baseline", entry=entry ), "group_sizes_age_groups": ( BLD / "data" / "population_structure" / "age_groups_rki.pkl" ), "group_sizes_states": ( BLD / "data" / "population_structure" / "federal_states.parquet" ), } if outcome in ["new_known_case", "newly_deceased"]: depends_on["rki"] = BLD / "data" / "processed_time_series" / "rki.pkl" produces = create_path_to_group_incidence_plot( name="combined_baseline", outcome=outcome, groupby=groupby ) parametrization.append((depends_on, produces, outcome, groupby)) return "depends_on, produces, outcome, groupby", parametrization
_SIGNATURE, _PARAMETRIZATION = create_parametrization() @pytask.mark.depends_on(_DEPENDENCIES) @pytask.mark.parametrize(_SIGNATURE, _PARAMETRIZATION)
[docs]def task_plot_age_group_incidences_in_one_scenario( depends_on, produces, outcome, groupby ): incidences = pd.read_pickle(depends_on["simulated"]) if "rki" in depends_on: if groupby == "age_group_rki": group_sizes = pd.read_pickle(depends_on["group_sizes_age_groups"])["n"] elif groupby == "state": state_info = pd.read_parquet(depends_on["group_sizes_states"]) group_sizes = state_info.set_index("name")["population"] rki_data = pd.read_pickle(depends_on["rki"]) rki_outcome = ( "newly_infected" if "new_known_case" in produces.name else "newly_deceased" ) rki = ( smoothed_outcome_per_hundred_thousand_rki( df=rki_data, outcome=rki_outcome, groupby=groupby, group_sizes=group_sizes, take_logs=False, ) * 10 ) else: rki = None nice_outcome = make_nice_outcome(outcome) title = f"{nice_outcome} in " + "{group}" ylabel = _get_ylabel(outcome) fig, ax = plot_group_time_series( df=incidences, title=title, rki=rki, ylabel=ylabel, ) fig.savefig(produces) plt.close()
[docs]def _get_ylabel(outcome): ylabel = OUTCOME_TO_Y_LABEL[outcome] if len(ylabel) > 45: split = ylabel.split() third = int(len(split) / 3) ylabel = ( " ".join(split[:third]) + "\n" + " ".join(split[third : 2 * third]) + "\n" + " ".join(split[2 * third :]) ) elif len(ylabel) > 24: split = ylabel.split() half = int(len(split) / 2) ylabel = " ".join(split[:half]) + "\n" + " ".join(split[half:]) return ylabel