Source code for src.create_initial_states.create_initial_conditions

import itertools as it
import warnings

import pandas as pd

from src.config import POPULATION_GERMANY
from src.create_initial_states.create_initial_immunity import create_initial_immunity
from src.create_initial_states.create_initial_infections import (
    create_initial_infections,
)


[docs]def create_initial_conditions( start, end, seed, virus_shares, reporting_delay, synthetic_data, empirical_infections, population_size=POPULATION_GERMANY, overall_share_known_cases=None, group_share_known_cases=None, group_weights=None, ): """Create the initial conditions, initial_infections and initial_immunity. Args: start (str or pd.Timestamp): Start date for collection of initial infections. end (str or pd.Timestamp): End date for collection of initial infections and initial immunity. seed (int) virus_shares (dict): Keys are the names of the virus strains. Values are pandas.Series with a DatetimeIndex and the share among newly infected individuals on each day as value. reporting_delay (int): Number of days by which the reporting of cases is delayed. If given, later days are used to get the infections of the demanded time frame. synthetic_data (pandas.DataFrame): The synthetic population data set. Needs to contain 'county' and 'age_group_rki' as columns. empirical_infections (pandas.DataFrame): The index must contain 'date', 'county' and 'age_group_rki'. overall_share_known_cases (pd.Series): Series with date index that contains the aggregated share of known cases over time. group_share_known_cases (pandas.Series): Series with age_groups in the index. The values are interpreted as share of known cases for each age group. group_weights (pandas.Series): Series with sizes or weights of age groups. Returns: initial_conditions (dict): dictionary containing the initial infections and initial immunity. """ seed = it.count(seed) upscaled_empirical_infections = _scale_up_empirical_new_infections( empirical_infections=empirical_infections, overall_share_known_cases=overall_share_known_cases, group_share_known_cases=group_share_known_cases, group_weights=group_weights, ) initial_infections = create_initial_infections( empirical_infections=upscaled_empirical_infections, synthetic_data=synthetic_data, start=start, end=end, reporting_delay=reporting_delay, seed=next(seed), virus_shares=virus_shares, population_size=population_size, ) initial_immunity = create_initial_immunity( empirical_infections=upscaled_empirical_infections, synthetic_data=synthetic_data, date=end, initial_infections=initial_infections, reporting_delay=reporting_delay, seed=next(seed), population_size=population_size, ) return { "initial_infections": initial_infections, "initial_immunity": initial_immunity,
# virus shares are already inside the initial infections so not included here. }
[docs]def _scale_up_empirical_new_infections( empirical_infections, group_share_known_cases=None, group_weights=None, overall_share_known_cases=None, ): """Scale up empirical infections with share of known cases. Args: empirical_infections (pandas.DataFrame): Must have the index levels date, county and age_group_rki and contain the column "newly_infected". group_share_known_cases (pandas.Series): Series with age_groups in the index. The values are interpreted as share of known cases for each age group. group_weights (pandas.Series): Series with sizes or weights of age groups. overall_share_known_cases (pd.Series): Series with date index that contains the aggregated share of known cases over time. Returns: pandas.Series: The upscaled new infections. Has the same index as empirical_infections. """ if group_share_known_cases is not None: assert group_weights is not None dates = empirical_infections.index.get_level_values("date").unique() start = dates.min() end = dates.max() date_range = pd.date_range(start, end, name="date") group_weights = group_weights / group_weights.sum() if overall_share_known_cases is not None: overall_share_known_cases = ( overall_share_known_cases.reindex(date_range) .fillna(method="bfill") .fillna(method="ffill") ) group_share_known_cases_df = create_group_specific_share_known_cases( group_share_known_cases=group_share_known_cases, group_weights=group_weights, overall_share_known_cases=overall_share_known_cases, date_range=date_range, ) stacked_group_share_known_cases = group_share_known_cases_df.stack() stacked_group_share_known_cases.name = "group_share_known_cases" if (stacked_group_share_known_cases > 0.95).any(): stacked_group_share_known_cases = stacked_group_share_known_cases.clip(0, 0.95) warnings.warn( "The group specific share known cases is > 0.95 for some date and group. " "If this happened with debug states you can simply ignore it. If it " "happened with full states, you should investigate it. The group's share " "known cases has been clipped to 0.95.", UserWarning, ) if (stacked_group_share_known_cases < 0.05).any(): stacked_group_share_known_cases = stacked_group_share_known_cases.clip(0.05, 1) warnings.warn( "The group specific share known cases is < 0.05 for some date and group. " "If this happened with debug states you can simply ignore it. If it " "happened with full states, you should investigate it. The group's share " "known cases has been clipped to 0.05.", UserWarning, ) merged = pd.merge( empirical_infections.reset_index(), right=stacked_group_share_known_cases.reset_index(), on=["date", "age_group_rki"], ) merged["upscaled_newly_infected"] = ( merged["newly_infected"] / merged["group_share_known_cases"] ) merged = merged.set_index(["date", "county", "age_group_rki"]) return merged["upscaled_newly_infected"]
[docs]def create_group_specific_share_known_cases( group_share_known_cases, group_weights, overall_share_known_cases, date_range, ): """Create the group specific share known cases. Args: group_share_known_cases (pandas.Series): Series with age_groups in the index. The values are interpreted as share of known cases for each age group. group_weights (pandas.Series): Series with sizes or weights of age groups. overall_share_known_cases (pd.Series): Series with date index that contains the aggregated share of known cases over time. Returns: pandas.DataFrame: The index are the dates, the columns are the group labels. The value is the share known cases of the particular group on the particular date. """ age_groups = group_weights.index # None given if group_share_known_cases is None and overall_share_known_cases is None: raise ValueError("Either group or overall share_known_cases must be given.") # both given elif group_share_known_cases is not None and overall_share_known_cases is not None: implied_overall = group_share_known_cases @ group_weights scaling_factor = overall_share_known_cases / implied_overall group_share_known_cases_df = pd.DataFrame( data=[group_share_known_cases] * len(date_range), index=date_range ) for col in group_share_known_cases_df: group_share_known_cases_df[col] = ( group_share_known_cases_df[col] * scaling_factor ) # only overall given elif overall_share_known_cases is not None: group_share_known_cases_df = pd.concat( [overall_share_known_cases] * len(age_groups), axis=1 ) group_share_known_cases_df.columns = age_groups # only group given else: group_share_known_cases_df = pd.DataFrame( data=[group_share_known_cases] * len(date_range), index=date_range ) return group_share_known_cases_df