Skip to content

Commit 9b57cb7

Browse files
authored
Merge pull request #2075 from hackforla/ssiegal.fallback-to-prior-year
Fix unit tests and parameterize dataset year
2 parents 65fd816 + 6eb0289 commit 9b57cb7

7 files changed

Lines changed: 67 additions & 83 deletions

File tree

backend/DbProvider.jsx

Lines changed: 14 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,6 @@ import DbContext from '@db/DbContext';
1111
import moment from 'moment';
1212

1313
const hf_account = import.meta.env.VITE_ENV === 'DEV' ? '311-Data-Dev' : '311-data';
14-
15-
// List of remote dataset locations used by db.registerFileURL
16-
const datasets = {
17-
parquet: {
18-
// huggingface
19-
hfYtd2025:
20-
`https://huggingface.co/datasets/${hf_account}/2025/resolve/main/2025.parquet`, // 2025 year-to-date
21-
hfYtd2024:
22-
`https://huggingface.co/datasets/${hf_account}/2024/resolve/main/2024.parquet`, // 2024 entire year
23-
hfYtd2023:
24-
`https://huggingface.co/datasets/${hf_account}/2023/resolve/main/2023.parquet`, // 2023 entire year
25-
hfYtd2022:
26-
`https://huggingface.co/datasets/${hf_account}/2022/resolve/main/2022.parquet`, // 2022 entire year
27-
hfYtd2021:
28-
`https://huggingface.co/datasets/${hf_account}/2021/resolve/main/2021.parquet`, // 2021 entire year
29-
hfYtd2020:
30-
`https://huggingface.co/datasets/${hf_account}/2020/resolve/main/2020.parquet`, // 2020 entire year
31-
},
32-
};
3314
function DbProvider({ children, startDate }) {
3415
const [db, setDb] = useState(null);
3516
const [conn, setConn] = useState(null);
@@ -65,43 +46,20 @@ function DbProvider({ children, startDate }) {
6546
DUCKDB_CONFIG.pthreadWorker,
6647
);
6748

68-
// register parquet
69-
await newDb.registerFileURL(
70-
'requests2025.parquet',
71-
//* Quick fix - change hfYtd2024 to hfYtd2025 when 2025 data available
72-
datasets.parquet.hfYtd2025,
73-
4, // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
74-
);
75-
76-
await newDb.registerFileURL(
77-
'requests2024.parquet',
78-
datasets.parquet.hfYtd2024,
79-
4, // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
80-
);
81-
82-
await newDb.registerFileURL(
83-
'requests2023.parquet',
84-
datasets.parquet.hfYtd2023,
85-
4,
86-
);
87-
88-
await newDb.registerFileURL(
89-
'requests2022.parquet',
90-
datasets.parquet.hfYtd2022,
91-
4,
92-
);
93-
94-
await newDb.registerFileURL(
95-
'requests2021.parquet',
96-
datasets.parquet.hfYtd2021,
97-
4,
98-
);
99-
100-
await newDb.registerFileURL(
101-
'requests2020.parquet',
102-
datasets.parquet.hfYtd2020,
103-
4,
104-
);
49+
// Register parquet files for years 2020 to current year
50+
const currentYear = new Date().getFullYear();
51+
for (let year = 2020; year <= currentYear; year++) {
52+
const datasetUrl = `https://huggingface.co/datasets/${hf_account}/${year}/resolve/main/${year}.parquet`;
53+
try {
54+
await newDb.registerFileURL(
55+
`requests${year}.parquet`,
56+
datasetUrl,
57+
4, // HTTP = 4. For more options: https://tinyurl.com/DuckDBDataProtocol
58+
);
59+
} catch (err) {
60+
console.warn(`Failed to register dataset for year ${year}:`, err);
61+
}
62+
}
10563

10664
// Create db connection
10765
const newConn = await newDb.connect();

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"lint": "eslint \"./**/*.js*\"",
1818
"lint:fix": "eslint --fix \"./**/*.js*\"",
1919
"test": "vitest",
20-
"check-env": "node ./utils/checkEnv"
20+
"check-env": "node ./src/utils/checkEnv"
2121
},
2222
"husky": {
2323
"hooks": {

src/features/Map/index.jsx

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,11 @@ class MapContainer extends React.Component {
7070

7171
createRequestsTable = async () => {
7272
this.setState({ isTableLoading: true });
73-
const { tableNameByYear, setDbStartTime } = this.context;
74-
const startDate = this.props.startDate; // directly use the startDate prop transformed for redux store
75-
const year = moment(startDate).year(); // extract the year
76-
const datasetFileName = `requests${year}.parquet`;
73+
const { setDbStartTime } = this.context;
74+
const { startDate } = this.props;
75+
let year = moment(startDate).year(); // extract the year
76+
const tableNameByYear = `requests_${year}`;
77+
let datasetFileName = `requests${year}.parquet`;
7778

7879
// Create the year data table if not exist already
7980
const createSQL = `CREATE TABLE IF NOT EXISTS ${tableNameByYear} AS SELECT * FROM '${datasetFileName}'`; // query from parquet
@@ -90,13 +91,11 @@ class MapContainer extends React.Component {
9091
)} ms.`
9192
);
9293
} catch (error) {
93-
console.error("Error in creating table or registering dataset:", error);
94-
} finally {
95-
this.setState({ isTableLoading: false });
94+
console.error(`Failed to load dataset for year ${year}:`, error);
9695
}
9796
};
9897

99-
async componentDidMount(props) {
98+
async componentDidMount() {
10099
this.isSubscribed = true;
101100
this.processSearchParams();
102101
if (DATA_SOURCE !== "SOCRATA") await this.createRequestsTable();

src/scripts/updateHfDataset.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,18 @@
55
from tqdm import tqdm
66
from huggingface_hub import HfApi, login
77
from dotenv import load_dotenv
8+
from datetime import datetime
89
load_dotenv()
910

11+
# Lookup table for data URLs by year
12+
DATA_URLS = {
13+
'2025': 'https://data.lacity.org/api/views/h73f-gn57/rows.csv?accessType=DOWNLOAD',
14+
'2026': 'https://data.lacity.org/api/v3/views/2cy6-i7zn/query.csv',
15+
}
16+
17+
def get_current_year():
18+
return str(datetime.now().year)
19+
1020
# set environment as 'dev' or 'prod'
1121
ENV = os.getenv('VITE_ENV')
1222

@@ -21,26 +31,37 @@
2131

2232
def dlData():
2333
'''
24-
Download the current year's dataset from data.lacity.org
34+
Download the current year's dataset from data.lacity.org.
35+
Returns the year, so it can be passed to subsequent steps.
2536
'''
26-
url = "https://data.lacity.org/api/views/h73f-gn57/rows.csv?accessType=DOWNLOAD"
27-
outfile = "2025.csv"
37+
year = get_current_year()
38+
if year not in DATA_URLS:
39+
raise ValueError(f"No data URL configured for year {year}")
40+
url = DATA_URLS[year]
41+
outfile = f"{year}.csv"
2842

2943
response = requests.get(url, stream=True)
3044

45+
# If we get a 4xx or 5xx HTTP status, raise an exception and stop processing altogether
46+
response.raise_for_status()
47+
3148
# Save downloaded file
3249
with open(outfile, "wb") as file:
3350
for data in tqdm(response.iter_content()):
3451
file.write(data)
3552

53+
return year
54+
3655

37-
def hfClean():
56+
def hfClean(year=None):
3857
'''
3958
Clean the dataset by removing problematic string combinations and update timestamp to ISO format
4059
'''
41-
infile = "2025.csv"
42-
fixed_filename = "2025-fixed.csv"
43-
clean_filename = "2025-clean.parquet"
60+
if year is None:
61+
year = get_current_year()
62+
infile = f"{year}.csv"
63+
fixed_filename = f"{year}-fixed.csv"
64+
clean_filename = f"{year}-clean.parquet"
4465

4566
# List of problmenatic strings to be replaced with ""
4667
replace_strings = ["VE, 0"]
@@ -65,13 +86,15 @@ def hfClean():
6586
print(f"File {infile} not found.")
6687

6788

68-
def hfUpload():
89+
def hfUpload(year=None):
6990
'''
7091
Upload the clean dataset to huggingface.co
7192
'''
72-
local_filename = '2025-clean.parquet'
73-
dest_filename = '2025.parquet'
74-
repo_name = '2025'
93+
if year is None:
94+
year = get_current_year()
95+
local_filename = f'{year}-clean.parquet'
96+
dest_filename = f'{year}.parquet'
97+
repo_name = f'{year}'
7598
repo_type = 'dataset'
7699

77100
repo_id = f"{HF_USERNAME}/{repo_name}"
@@ -95,9 +118,9 @@ def cleanUp():
95118

96119

97120
def main():
98-
dlData()
99-
hfClean()
100-
hfUpload()
121+
year = dlData()
122+
hfClean(year)
123+
hfUpload(year)
101124
cleanUp()
102125

103126

src/utils/checkEnv.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ const red = '\x1b[31m%s\x1b[0m';
1313
const green = '\x1b[32m%s\x1b[0m';
1414
const vitePrefix = 'VITE_';
1515

16-
const envPath = path.resolve(__dirname, '../.env');
17-
const exampleEnvPath = path.resolve(__dirname, '../.example.env');
16+
const envPath = path.resolve(__dirname, '../../.env');
17+
const exampleEnvPath = path.resolve(__dirname, '../../.example.env');
1818

1919
function getEnv(fileName) {
2020
return dotenv.parse(fs.readFileSync(fileName));

src/utils/test-utils.jsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import React from 'react'
22
import { render } from '@testing-library/react'
33
import { Provider } from 'react-redux'
4+
import { ThemeProvider } from '@mui/material/styles'
45
import { setupStore } from '../redux/store'
6+
import theme from '@theme/theme'
57

68
// https://redux.js.org/usage/writing-tests#setting-up-a-reusable-test-render-function
79
export function renderWithProviders(ui, extendedRenderOptions = {}) {
@@ -13,7 +15,9 @@ export function renderWithProviders(ui, extendedRenderOptions = {}) {
1315
} = extendedRenderOptions
1416

1517
const Wrapper = ({ children }) => (
16-
<Provider store={store}>{children}</Provider>
18+
<Provider store={store}>
19+
<ThemeProvider theme={theme}>{children}</ThemeProvider>
20+
</Provider>
1721
)
1822

1923
// Return an object with the store and all of RTL's query functions

vite.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export default defineConfig(() => {
3131
},
3232
test: {
3333
environment: 'jsdom',
34-
setupFiles: 'utils/test-setup.js'
34+
setupFiles: 'src/utils/test-setup.js'
3535
}
3636
};
3737
});

0 commit comments

Comments
 (0)