Source code for src.testing.create_rapid_test_statistics

import itertools

import numpy as np
import pandas as pd
from sid.rapid_tests import _sample_test_outcome

from src.config import POPULATION_GERMANY
from src.policies.policy_tools import combine_dictionaries


[docs]def create_rapid_test_statistics(demand_by_channel, states, date, params): """Calculate the rapid test statistics. Args: demand_by_channel (pandas.DataFrame): same index as states. Each column is one channel through which rapid tests can be demanded. states (pandas.DataFrame): sid states DataFrame. date (pandas.Timestamp or str): date params (pandas.DataFrame): parameter DataFrame that contains the sensitivity and specificity of the rapid tests Returns: statistics (pandas.DataFrame): DataFrame with just one column named 0. The index contains the date, the number of individuals and for each channel the share of the population that demand a test through this channel, the share of tests in each channel that are demanded by infected individuals. """ statistics = { "date": date, } demand_by_channel = demand_by_channel.copy() demand_by_channel["overall"] = demand_by_channel.any(axis=1) for channel in demand_by_channel.columns: # because we don't know the seed with which sample_test_outcome will be called # with, these results will not be exactly equal to the test outcomes in sid but # most channels easily exceed the number of tests for randomness to be relevant rapid_test_results = _sample_test_outcome( states=states, receives_rapid_test=demand_by_channel[channel], params=params, seed=itertools.count(93894), ) channel_statistics = _calculate_rapid_test_statistics_by_channel( states=states, rapid_test_results=rapid_test_results, receives_rapid_test=demand_by_channel[channel], channel_name=channel, ) statistics = combine_dictionaries([statistics, channel_statistics]) statistics = pd.Series(statistics).to_frame() statistics.index.name = "index" return statistics
[docs]def _calculate_rapid_test_statistics_by_channel( states, rapid_test_results, receives_rapid_test, channel_name, ): """Calculate the rapid test statistics for one channel or overall. Naming convention for the denominators: - testshare -> n_tests - popshare -> n_people - number -> n_people / POPULATION_GERMANY Args: states (pandas.DataFrame): sid states DataFrame. receives_rapid_test (pandas.Series): boolean Series with the same index as states. This is the demand Series for one channel or overall. rapid_test_result (pandas.Series): boolean Series with the result for each individual. This is False for individuals that were not tested. channel_name (str): name of the channel. Returns: dict """ n_obs = len(states) tested_positive = rapid_test_results[receives_rapid_test] tested_negative = ~rapid_test_results[receives_rapid_test] n_tested = receives_rapid_test.sum() n_tested = n_tested if n_tested != 0 else np.nan n_tested_positive = tested_positive.sum() n_tested_positive = n_tested_positive if n_tested_positive != 0 else np.nan n_tested_negative = tested_negative.sum() n_tested_negative = n_tested_negative if n_tested_negative != 0 else np.nan individual_outcomes = { "tested": receives_rapid_test, "tested_positive": tested_positive, "tested_negative": tested_negative, "true_positive": tested_positive & states["currently_infected"], "true_negative": tested_negative & ~states["currently_infected"], "false_positive": tested_positive & ~states["currently_infected"], "false_negative": tested_negative & states["currently_infected"], } statistics = {} for name, sr in individual_outcomes.items(): statistics[f"number_{name}_by_{channel_name}"] = ( POPULATION_GERMANY * sr.sum() / n_obs ) statistics[f"popshare_{name}_by_{channel_name}"] = sr.sum() / n_obs statistics[f"testshare_{name}_by_{channel_name}"] = sr.sum() / n_tested return statistics