Source code for covid_19_simulations.two_d

import numpy as np
import pandas as pd
from numba import jit

from math import sqrt
from tqdm import tqdm


[docs]@jit(nopython=True) def find_first(searched, vec): """return the index of the first occurence of item in vec""" for i, item in enumerate(vec): if searched == item: return i return -1
[docs]def do_multiplier(x, y, power, thresh, df_sorted, df2D_test, farthest_calc): x = np.float64(x) y = np.float64(y) power = np.float64(power) thresh = np.float64(thresh) place_in_sorted = find_first(x, df_sorted.x.values) calc_indices = df_sorted.index[place_in_sorted - np.int(farthest_calc):place_in_sorted + np.int(farthest_calc)] df_calc_x = df2D_test.x[calc_indices] df_calc_y = df2D_test.y[calc_indices] dists = np.sqrt((df_calc_x.sub(x))**2 + (df_calc_y.sub(y))**2) multiplier_col = np.where(dists > thresh, np.power(dists, -power), np.ones_like(dists)) del dists return multiplier_col, calc_indices
[docs]def distance(frame, ind1, ind2): """Just finding the distance between two rows and their x-y pairs.""" x1, y1 = frame['x'].values[ind1], frame['y'].values[ind1] x2, y2 = frame['x'].values[ind2], frame['y'].values[ind2] return sqrt((x1-x2)**2 + (y1-y2)**2)
[docs]def infect2D(df, trans_rate, day_name, thresh, power, df_sorted, farthest_calc): """ Simulates a single day of infection in 1D. NOTE: a 0 counts as infected, while a 1 is healthy. Inputs: df : (pandas DataFrame) object holding all values of infected people. Each column of "infected day _" corresponds to a different day, with "_" being some integer or float. The "name" column assigns a name to each object, independent of index. In the infected columns, a 0 counts as infected, while a 1 is healthy. trans_rate : (float) rate of transmission between individuals. infection is performed in a probabilistic manner, casting it as a draw from a binomial distribution with a rate of 1 - trans_rate. day_name : (float or int) the day of this infection, used to create a new column in the dataframe tracking the day's infections. dist_matrix : (numpy.ndarray) distance matrix holding the distances between all individuals. Outputs: df : (pandas DataFrame) object, same as the input, but with a new column holding this day's infected results. """ df[f"infected day {day_name}"] = df[f"infected day {day_name - 1}"].copy() infected = df[df[f'infected day {day_name}'] == 0.] if len(infected) == len(df): # everyone is infected return df for index, row in infected.iterrows(): # this is number of rows, right? # find distance multiplier between this ind and all others x, y = row['x'], row['y'] multiplier_col, calc_indices = do_multiplier(x, y, power, thresh, df_sorted, df, farthest_calc) p = 1 - (multiplier_col * trans_rate) infect_col = np.random.binomial(size=len(calc_indices), p=p, n=1) # 1 toss # df.loc[calc_indices, f'infected day {day_name}'] *= infect_col df.loc[calc_indices[infect_col == 0], f'infected day {day_name}'] = 0 return df
[docs]def simulate2D(N, trans_rate, t_steps, N_initial, thresh, power, distrib_pop, distrib_infec, kwargs_for_pop={}, kwargs_for_infec={}): """ Simulates an infection run in 1D. Inputs: N : (int) number of individuals in the system. trans_rate : (float) rate of transmission between individuals. infection is performed in a probabilistic manner, casting it as a draw from a binomial distribution with a rate of 1 - trans_rate. t_steps : (int) number of time steps ("days") to consider. N_initial : (int) number of initially infected individuals. thresh : (float) distance less than which infection is transmitted at the trans_rate; that is, less than which this function returns a value of 1. At a distance greater than this, this function returns 1/distance^power. power : (float) Greater than 0. Power to which the multiplier falls off if the distance is greater than some threshold. distrib_pop : (func) distribution function to determine how individuals are initialized. distrib_infec : (func) distribution function to determine how initial infections are initialized. kwargs_for_pop : (dict) keyword arguments passed to the distrib_pop distribution type. Size not included. kwargs_for_infec : (dict) keyword arguments passed to the distrib_infect distribution type. Size not included. Outputs: df : (pandas DataFrame) object holding all values of infected people. Each column of "infected day _" corresponds to a different day, with "_" being some integer or float. The "name" column assigns a name to each object, independent of index. In the infected columns, a 0 counts as infected, while a 1 is healthy. """ # making separate name column because indices get messy. # other cols later. name = np.arange(N) zero_infected = np.ones(N) pop = initialize_pop_2D(N, distrib_pop, **kwargs_for_pop) x, y = pop[:, 0], pop[:, 1] d = {'name': np.arange(N), 'infected day 0': zero_infected, 'x' : x, 'y' : y} df = pd.DataFrame(data=d) pop = initialize_pop_2D(N_initial, distrib_infec, **kwargs_for_infec) x, y = pop[:, 0], pop[:, 1] all_infected = np.zeros(N_initial) d_infec = {'name': np.arange(N, N + N_initial), 'infected day 0': all_infected, 'x' : x, 'y' : y} df_infec = pd.DataFrame(data=d_infec) df = df.append(df_infec, ignore_index = True) test_vals = np.round(np.linspace(1, 1000, 100)) df['Rank'] = df.x.rank() + df.y.rank() df_sorted = df.sort_values('Rank', ascending=False).drop('Rank',axis=1) dists = np.array([distance(df_sorted, 100, int(val)) for val in test_vals]) farthest_calc = int(test_vals[np.argmin(np.abs(dists - thresh))]) # the above determines the farthest index to calculate distance from a given point. for t in tqdm(range(1, t_steps), position=0, leave=True): df = infect2D(df, trans_rate, t, thresh, power, df_sorted, farthest_calc) return df
[docs]def initialize_pop_2D(N, distrib, **kwargs): ''' This will change once we have the U.S. map. distrib: size is not a thing again. ''' pop = distrib(size=(N, 2), **kwargs) return pop