Source code for src.create_initial_states.create_initial_immunity

import numpy as np
import pandas as pd
from sid.shared import boolean_choices


[docs]def create_initial_immunity( empirical_infections, synthetic_data, initial_infections, date, seed, reporting_delay, population_size, ): """Create a Series with initial immunity. Args: empirical_infections (pandas.Series): Newly infected Series with the index levels ["date", "county", "age_group_rki"]. These must already be corrected to include undetected cases. synthetic_data (pandas.DataFrame): Dataset with one row per simulated individual. Must contain the columns age_group_rki and county. initial_infections (pandas.DataFrame): DataFrame with same index as synthetic_data and one column for each day until *date*. Dtype is boolean. It is assumed that these already include undetected cases. seed (int) reporting_delay (int): Number of days by which the reporting of cases is delayed. If given, later days are used to get the infections of the demanded time frame. population_size (int): Size of the population behind the empirical_infections. Returns: pd.Series: Boolean series with same index as synthetic_data. """ date_with_delay = pd.Timestamp(date) + pd.Timedelta(days=reporting_delay) empirical_infections = empirical_infections[:date_with_delay].sort_index() initial_before_date = [ pd.Timestamp(col) <= date_with_delay for col in initial_infections ] assert all(initial_before_date), f"Initial infections must lie before {date}." index_cols = ["date", "county", "age_group_rki"] correct_index_levels = empirical_infections.index.names == index_cols assert correct_index_levels, f"Your data must have {index_cols} as index levels." duplicates_in_index = empirical_infections.index.duplicated().any() assert not duplicates_in_index, "Your index must not have any duplicates." endog_immune = initial_infections.any(axis=1) total_immune = empirical_infections.groupby(["age_group_rki", "county"]).sum() total_immunity_prob = _calculate_total_immunity_prob( total_immune, synthetic_data, population_size, ) endog_immunity_prob = _calculate_endog_immunity_prob( initial_infections, synthetic_data, ) exog_immunity_prob = _calculate_exog_immunity_prob( total_immunity_prob, endog_immunity_prob ) np.random.seed(seed) # need to duplicate exog prob on synthetical data hypothetical_exog_prob = pd.merge( synthetic_data, exog_immunity_prob, left_on=["age_group_rki", "county"], right_index=True, validate="m:1", )["exog_immunity_prob"] hypothetical_exog_prob = hypothetical_exog_prob.reindex(synthetic_data.index) hypothetical_exog_choice = pd.Series( boolean_choices(hypothetical_exog_prob.to_numpy()), index=synthetic_data.index, ) return hypothetical_exog_choice.where(~endog_immune, endog_immune)
[docs]def _calculate_total_immunity_prob(total_immunity, synthetic_data, population_size): """Calculate the probability to be immune by county and age group. Args: total_immunity (pandas.Series): index are the county and age group. Values are the total numbers of immune individuals. These must already include undetected cases. synthetic_data (pandas.DataFrame): DataFrame of synthetic individuals. Must contain age_group_rki and county as columns. population_size (int): number of individuals in the population from which the total_immunity was calculated. Returns: immunity_prob (pandas.Series): Index are county and age group combinations. Values are the probabilities of individuals of a particular county and age group to be immune. """ upscale_factor = population_size / len(synthetic_data) synthetic_group_sizes = synthetic_data.groupby(["age_group_rki", "county"]).size() upscaled_group_sizes = synthetic_group_sizes * upscale_factor total_immunity = total_immunity.reindex(upscaled_group_sizes.index).fillna(0) immunity_prob = total_immunity / upscaled_group_sizes return immunity_prob
[docs]def _calculate_endog_immunity_prob(initial_infections, synthetic_data): """Calculate the immunity probability from initial infections. Args: initial_infections (pandas.DataFrame): DataFrame with same index as synthetic_data and one column for each day between start and end. Dtype is boolean. synthetic_data (pandas.DataFrame): Dataset with one row per simulated individual. Must contain the columns age_group_rki and county. Returns: prob_endog_immune (pandas.Series): Probabilities to become initially infected by age group and county. """ df = synthetic_data[["age_group_rki", "county"]].copy() df["endog_immune"] = initial_infections.any(axis=1) prob_endog_immune = df.groupby(["age_group_rki", "county"])["endog_immune"].mean() return prob_endog_immune
[docs]def _calculate_exog_immunity_prob(total_immunity_prob, endog_immunity_prob): """Conditional probability to be immune, given not endogenously immune.""" sr = (total_immunity_prob - endog_immunity_prob) / (1 - endog_immunity_prob) sr.name = "exog_immunity_prob" return sr