|
1 | 1 | import pandas as pd |
2 | | -from datetime import datetime |
3 | | - |
4 | 2 |
|
5 | 3 | if __name__ == "__main__": |
6 | 4 | df = pd.read_csv("data/Flu_USA/ILINet.csv") |
7 | 5 |
|
8 | | - df = df[df['REGION TYPE'] == 'National'] |
9 | | - df = df[df['WEEK'] != 53] |
| 6 | + df = df[df["REGION TYPE"] == "National"] |
| 7 | + df = df[df["WEEK"] != 53] |
10 | 8 |
|
11 | | - df['date'] = pd.to_datetime(df['YEAR'].astype(str) + '-W' + df['WEEK'].astype(str) + '-1', format='%Y-W%U-%w') |
| 9 | + df["date"] = pd.to_datetime( |
| 10 | + df["YEAR"].astype(str) + "-W" + df["WEEK"].astype(str) + "-1", |
| 11 | + format="%Y-W%U-%w", |
| 12 | + ) |
12 | 13 |
|
13 | 14 | result = [] |
14 | 15 | for i in range(len(df) - 1): |
15 | | - result.append(df.iloc[i]) |
| 16 | + result.append(df.iloc[i]) |
16 | 17 |
|
17 | | - if (df.iloc[i + 1]['date'] - df.iloc[i]['date']).days == 14: |
| 18 | + if (df.iloc[i + 1]["date"] - df.iloc[i]["date"]).days == 14: |
18 | 19 | new_row = df.iloc[i].copy() |
19 | | - new_row['date'] = df.iloc[i]['date'] + pd.Timedelta(days=7) |
| 20 | + new_row["date"] = df.iloc[i]["date"] + pd.Timedelta(days=7) |
20 | 21 | result.append(new_row) |
21 | | - |
| 22 | + |
22 | 23 | result.append(df.iloc[-1]) |
23 | 24 | df = pd.DataFrame(result) |
24 | 25 |
|
25 | | - df = df.drop(columns=['YEAR', 'WEEK']) |
26 | | - gaps = df['date'].diff().dropna().unique() |
| 26 | + df = df.drop(columns=["YEAR", "WEEK"]) |
| 27 | + gaps = df["date"].diff().dropna().unique() |
27 | 28 | print("Unique time intervals:", gaps) |
28 | | - df['time_diff'] = df['date'].diff() |
| 29 | + df["time_diff"] = df["date"].diff() |
29 | 30 |
|
30 | | - rows_with_14_days = df[df['time_diff'] == pd.Timedelta(days=14)] |
| 31 | + rows_with_14_days = df[df["time_diff"] == pd.Timedelta(days=14)] |
31 | 32 | print(rows_with_14_days) |
32 | | - df = df.drop(columns=['time_diff']) |
33 | | - infered_freq = pd.infer_freq(df['date']) |
| 33 | + df = df.drop(columns=["time_diff"]) |
| 34 | + infered_freq = pd.infer_freq(df["date"]) |
34 | 35 | print(f"Infered frequency: {infered_freq}") |
35 | 36 |
|
36 | 37 | df.to_csv("data/Flu_USA/Flu_USA.csv", index=False) |
37 | 38 |
|
38 | 39 | print("Data saved to output.csv") |
39 | | - |
0 commit comments