Source code for src.testing.create_rapid_test_statistics

import itertools

import numpy as np
import pandas as pd
from sid.rapid_tests import _sample_test_outcome

from src.config import POPULATION_GERMANY
from src.policies.policy_tools import combine_dictionaries


[docs]def create_rapid_test_statistics(demand_by_channel, states, date, params):
    """Calculate the rapid test statistics.

    Args:
        demand_by_channel (pandas.DataFrame): same index as states. Each column is one
            channel through which rapid tests can be demanded.
        states (pandas.DataFrame): sid states DataFrame.
        date (pandas.Timestamp or str): date
        params (pandas.DataFrame): parameter DataFrame that contains the sensitivity
            and specificity of the rapid tests

    Returns:
        statistics (pandas.DataFrame): DataFrame with just one column named 0. The
            index contains the date, the number of individuals and for each channel
            the share of the population that demand a test through this channel, the
            share of tests in each channel that are demanded by infected individuals.

    """
    statistics = {
        "date": date,
    }

    demand_by_channel = demand_by_channel.copy()
    demand_by_channel["overall"] = demand_by_channel.any(axis=1)

    for channel in demand_by_channel.columns:
        # because we don't know the seed with which sample_test_outcome will be called
        # with, these results will not be exactly equal to the test outcomes in sid but
        # most channels easily exceed the number of tests for randomness to be relevant
        rapid_test_results = _sample_test_outcome(
            states=states,
            receives_rapid_test=demand_by_channel[channel],
            params=params,
            seed=itertools.count(93894),
        )

        channel_statistics = _calculate_rapid_test_statistics_by_channel(
            states=states,
            rapid_test_results=rapid_test_results,
            receives_rapid_test=demand_by_channel[channel],
            channel_name=channel,
        )
        statistics = combine_dictionaries([statistics, channel_statistics])

    statistics = pd.Series(statistics).to_frame()
    statistics.index.name = "index"
    return statistics


[docs]def _calculate_rapid_test_statistics_by_channel(
    states,
    rapid_test_results,
    receives_rapid_test,
    channel_name,
):
    """Calculate the rapid test statistics for one channel or overall.

    Naming convention for the denominators:

        - testshare             -> n_tests
        - popshare              -> n_people
        - number                -> n_people / POPULATION_GERMANY

    Args:
        states (pandas.DataFrame): sid states DataFrame.
        receives_rapid_test (pandas.Series): boolean Series with the same index as
            states. This is the demand Series for one channel or overall.
        rapid_test_result (pandas.Series): boolean Series with the result for each
            individual. This is False for individuals that were not tested.
        channel_name (str): name of the channel.

    Returns:
        dict

    """
    n_obs = len(states)
    tested_positive = rapid_test_results[receives_rapid_test]
    tested_negative = ~rapid_test_results[receives_rapid_test]

    n_tested = receives_rapid_test.sum()
    n_tested = n_tested if n_tested != 0 else np.nan
    n_tested_positive = tested_positive.sum()
    n_tested_positive = n_tested_positive if n_tested_positive != 0 else np.nan
    n_tested_negative = tested_negative.sum()
    n_tested_negative = n_tested_negative if n_tested_negative != 0 else np.nan

    individual_outcomes = {
        "tested": receives_rapid_test,
        "tested_positive": tested_positive,
        "tested_negative": tested_negative,
        "true_positive": tested_positive & states["currently_infected"],
        "true_negative": tested_negative & ~states["currently_infected"],
        "false_positive": tested_positive & ~states["currently_infected"],
        "false_negative": tested_negative & states["currently_infected"],
    }

    statistics = {}
    for name, sr in individual_outcomes.items():
        statistics[f"number_{name}_by_{channel_name}"] = (
            POPULATION_GERMANY * sr.sum() / n_obs
        )
        statistics[f"popshare_{name}_by_{channel_name}"] = sr.sum() / n_obs
        statistics[f"testshare_{name}_by_{channel_name}"] = sr.sum() / n_tested

    return statistics