Source code for src.plotting.task_plot_rapid_test_statistics

import matplotlib.pyplot as plt
import pandas as pd
import pytask
import seaborn as sns

from src.config import BLD
from src.config import PLOT_END_DATE
from src.config import PLOT_SIZE
from src.plotting.plotting import BLUE
from src.plotting.plotting import GREEN
from src.plotting.plotting import style_plot
from src.plotting.plotting import YELLOW
from src.simulation.scenario_config import (
    create_path_to_rapid_test_statistic_time_series,
)
from src.simulation.task_process_rapid_test_statistics import CHANNELS
from src.simulation.task_process_rapid_test_statistics import OUTCOMES
from src.simulation.task_process_rapid_test_statistics import SHARE_TYPES

[docs]RATES = [
    "true_positive_rate",
    "false_positive_rate",
    "true_negative_rate",
    "false_negative_rate",
]


[docs]def _create_rapid_test_plot_parametrization():
    signature = "depends_on, plot_single_runs, ylabel, produces"

    label_templates = {
        "number": "number of {nice_outcome} per million",
        "popshare": "share of the population with {nice_outcome}",
        "testshare": "share of {nice_outcome}",
    }
    nice_outcomes = {
        "false_negative": "false negative tests",
        "false_positive": "false positive tests",
        "tested_negative": "negative tests",
        "tested_positive": "positive tests",
        "true_negative": "true negative tests",
        "true_positive": "true positive tests",
        "tested": "tests",
    }

    column_and_label = [(rate, rate.replace("_", " ")) for rate in RATES]
    for outcome in OUTCOMES:
        for share_type in SHARE_TYPES:
            column = f"{share_type}_{outcome}"
            label = label_templates[share_type].format(
                nice_outcome=nice_outcomes[outcome]
            )
            column_and_label.append((column, label))

    parametrization = []
    for column, ylabel in column_and_label:
        for plot_single_runs in [True, False]:
            spec = _create_spec(
                column=column,
                plot_single_runs=plot_single_runs,
                ylabel=ylabel,
            )
            parametrization.append(spec)

    return signature, parametrization


[docs]def _create_spec(column, plot_single_runs, ylabel):
    depends_on = {}
    for channel in CHANNELS:
        channel_column = column + f"_by_{channel}"
        depends_on[channel_column] = create_path_to_rapid_test_statistic_time_series(
            "spring_baseline", channel_column
        )

    if plot_single_runs:
        file_name = f"{column}_with_single_runs.pdf"
    else:
        file_name = f"{column}.pdf"
    produces = BLD / "figures" / "rapid_test_statistics" / file_name
    spec = (depends_on, plot_single_runs, ylabel, produces)
    return spec


[docs]_PARAMETRIZATION = _create_rapid_test_plot_parametrization()


@pytask.mark.parametrize(*_PARAMETRIZATION)
[docs]def task_plot_rapid_test_statistics(depends_on, plot_single_runs, ylabel, produces):
    dfs = {col: pd.read_pickle(path) for col, path in depends_on.items()}

    fig, ax = plt.subplots(figsize=PLOT_SIZE)
    for col, df in dfs.items():
        if "number_" in col:
            # scale from cases in Germany to cases per million
            df = df / 83

        color, label = _get_channel_color_and_label(col)
        ax = _plot_df(
            df=df.loc["2021-03-15":],
            column=col,
            color=color,
            plot_single_runs=plot_single_runs,
            ax=ax,
            label=label,
        )

    ax.set_xlim(pd.Timestamp("2021-03-01"), pd.Timestamp(PLOT_END_DATE))
    fig, ax = style_plot(fig, ax)
    ax.set_ylabel(ylabel)
    fig.tight_layout()
    fig.savefig(produces)
    plt.close()


[docs]def _plot_df(
    df,
    column,
    color,
    label,
    ax,
    plot_single_runs,
):
    sns.lineplot(
        x=df[column].index,
        y=df[column],
        ax=ax,
        linewidth=4,
        color=color,
        label=label,
        alpha=0.8,
    )

    if plot_single_runs:
        for col in df.columns:
            if col != column:
                sns.lineplot(
                    x=df.index,
                    y=df[col].rolling(window=7, min_periods=1, center=False).mean(),
                    ax=ax,
                    linewidth=2.5,
                    color=color,
                    alpha=0.6,
                )
    return ax


[docs]def _get_channel_color_and_label(col):
    if "work" in col:
        color = GREEN
        label = "work"
    elif "educ" in col:
        color = YELLOW
        label = "educ"
    elif "private" in col:
        color = BLUE
        label = "private"
    else:
        color = "k"
        label = "aggregate"
    return color, label