This repository was archived by the owner on Sep 30, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcovid_plot.py
More file actions
90 lines (73 loc) · 2.95 KB
/
covid_plot.py
File metadata and controls
90 lines (73 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#
# covid_plot.py - plot covid-19 stats
#
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd # Using numpy would require a lot of work!! ;)
# date processing
from datetime import datetime
import re
def dateconv(date_str):
"""
:returns: a date instance
"""
return np.datetime64(datetime.strptime(date_str, '%m/%d/%y')), #ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
# This fails because of inconsistent data formatting. Resolving to using pandas instead
# then converting to numpy array
# np_data = np.recfromcsv('./covid_19_clean_complete.csv', delimiter=',', dtype=None, encoding="utf8", names=True,
np_data = np.recfromcsv('covid_19_clean_complete_with_hints.csv', delimiter=',', dtype=None, encoding="utf8", names=True,
converters={4: dateconv, 5: int, 6: int, 7: int},
# converters={5: int, 6: int, 7: int},
usecols = (1,4,5,6,7),
missing_values={0: '???'},
filling_values={0: 'Unknown'}
) #
# data = pd.read_csv('covid_19_data.csv', delimiter=',').to_numpy() # read data to numpy array
# data = pd.read_csv(
# './covid_19_clean_complete.csv', # file to open
# delimiter=',', # csv files use , delimiter
# usecols=(1,4,5,6,7) # 5 columns [Country, Date, Confirmed, Deaths, Recovered]
# ) # read data to numpy array
# Display the data
# indexes
country, date, confirmed, deaths, recovered = 0,1,2,3,4
# Comprehension to get Cases only in Australia
aussie_cases = np.array([s for s in np_data if s[country] == "Australia"])
# aussie_cases = np_data
num_cases = len(aussie_cases)
cumulative_conf = np.zeros(num_cases, dtype=int)
cumulative_dth = np.zeros(num_cases, dtype=int)
cumulative_rec = np.zeros(num_cases, dtype=int)
log_conf = np.zeros(num_cases)
log_dth = np.zeros(num_cases)
log_rec = np.zeros(num_cases)
# dates = np.zeros(num_cases, dtype=int)
dates = np.linspace(0, num_cases, num_cases, dtype=int)
actual_dates = np.zeros(num_cases, dtype='datetime64[D]') # https://stackoverflow.com/questions/27469031/cannot-populate-numpy-datetime64-arrays
growth_rate = np.zeros(num_cases)
prev_case = 0
# Australian cases
print("Cases -> ", num_cases)
index = 0
# TODO - Predict the iterations
for case in aussie_cases:
# unpack case date
# dates[index] = index
(actual_dates[index], ) = case[date] # Returns a tuple
if index == 0:
cumulative_conf[index] == case[confirmed]
growth_rate[index] = 1
else:
cumulative_conf[index] = cumulative_conf[index-1] + case[confirmed]
growth_rate[index] = case[confirmed] / prev_case
prev_case = case[confirmed]
prev_case = case[confirmed]
index += 1
clean_growth = np.nan_to_num(growth_rate)
print(cumulative_conf)
# mean_growth = np.mean(clean_growth)
# max_growth = np.max(clean_growth)
plt.title("Covid-19")
# plt.yscale("log")
plt.plot(actual_dates, cumulative_conf, 'g')
plt.show()