|
| 1 | +# %% |
| 2 | +%pip install -q openmeteo-requests retry-requests requests-cache ipyleaflet |
| 3 | + |
| 4 | +# %% |
| 5 | +from pathlib import Path |
| 6 | +from ipyleaflet import Map, Marker |
| 7 | +import openmeteo_requests |
| 8 | + |
| 9 | +import pandas as pd |
| 10 | +import requests_cache |
| 11 | +from retry_requests import retry |
| 12 | + |
| 13 | +# %% |
| 14 | +# List of 10 median to large urban areas with their GPS coordinates to cover |
| 15 | +# the most populous areas in France. |
| 16 | +cities = [ |
| 17 | + {"name": "Paris", "latitude": 48.8566, "longitude": 2.3522}, |
| 18 | + {"name": "Lyon", "latitude": 45.7640, "longitude": 4.8357}, |
| 19 | + # {"name": "Marseille", "latitude": 43.2965, "longitude": 5.3698}, |
| 20 | + # {"name": "Toulouse", "latitude": 43.6047, "longitude": 1.4442}, |
| 21 | + # {"name": "Lille", "latitude": 50.6292, "longitude": 3.0573}, |
| 22 | + # {"name": "Limoges", "latitude": 45.8336, "longitude": 1.2616}, |
| 23 | + # {"name": "Nantes", "latitude": 47.2184, "longitude": -1.5536}, |
| 24 | + # {"name": "Strasbourg", "latitude": 48.5734, "longitude": 7.7521}, |
| 25 | + # {"name": "Brest", "latitude": 48.3904, "longitude": -4.4861}, |
| 26 | + # {"name": "Bayonne", "latitude": 43.4833, "longitude": -1.4667}, |
| 27 | +] |
| 28 | + |
| 29 | +map_center = [46.6034, 1.8883] # Approximate center of France |
| 30 | +m = Map(center=map_center, zoom=6) |
| 31 | +for city in cities: |
| 32 | + marker = Marker(location=(city["latitude"], city["longitude"]), title=city["name"]) |
| 33 | + m.add_layer(marker) |
| 34 | +m |
| 35 | + |
| 36 | + |
| 37 | +# %% |
| 38 | + |
| 39 | +def download_weather_data(city): |
| 40 | + cache_session = requests_cache.CachedSession(".cache", expire_after=3600) |
| 41 | + retry_session = retry(cache_session, retries=5, backoff_factor=0.2) |
| 42 | + openmeteo = openmeteo_requests.Client(session=retry_session) |
| 43 | + |
| 44 | + # Make sure all required weather variables are listed here. The order of |
| 45 | + # variables in hourly or daily is important to assign them correctly below. |
| 46 | + url = "https://historical-forecast-api.open-meteo.com/v1/forecast" |
| 47 | + params = { |
| 48 | + "latitude": city["latitude"], |
| 49 | + "longitude": city["longitude"], |
| 50 | + "start_date": "2021-01-01", |
| 51 | + "end_date": "2025-05-31", |
| 52 | + "hourly": [ |
| 53 | + "temperature_2m", |
| 54 | + "precipitation", |
| 55 | + "wind_speed_10m", |
| 56 | + "cloud_cover", |
| 57 | + "soil_moisture_1_to_3cm", |
| 58 | + "relative_humidity_2m", |
| 59 | + ], |
| 60 | + "timezone": "GMT", # Use GMT to ease temporal joins. |
| 61 | + } |
| 62 | + response = openmeteo.weather_api(url, params=params)[0] |
| 63 | + |
| 64 | + # Process hourly data. The order of variables needs to be the same as requested. |
| 65 | + hourly = response.Hourly() |
| 66 | + hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy() |
| 67 | + hourly_precipitation = hourly.Variables(1).ValuesAsNumpy() |
| 68 | + hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy() |
| 69 | + hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy() |
| 70 | + hourly_soil_moisture_1_to_3cm = hourly.Variables(4).ValuesAsNumpy() |
| 71 | + hourly_relative_humidity_2m = hourly.Variables(5).ValuesAsNumpy() |
| 72 | + |
| 73 | + hourly_data = { |
| 74 | + "time": pd.date_range( |
| 75 | + start=pd.to_datetime(hourly.Time(), unit="s", utc=True), |
| 76 | + end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True), |
| 77 | + freq=pd.Timedelta(seconds=hourly.Interval()), |
| 78 | + inclusive="left", |
| 79 | + ) |
| 80 | + } |
| 81 | + |
| 82 | + hourly_data["temperature_2m"] = hourly_temperature_2m |
| 83 | + hourly_data["precipitation"] = hourly_precipitation |
| 84 | + hourly_data["wind_speed_10m"] = hourly_wind_speed_10m |
| 85 | + hourly_data["cloud_cover"] = hourly_cloud_cover |
| 86 | + hourly_data["soil_moisture_1_to_3cm"] = hourly_soil_moisture_1_to_3cm |
| 87 | + hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m |
| 88 | + return pd.DataFrame(data=hourly_data) |
| 89 | + |
| 90 | + |
| 91 | +# %% |
| 92 | +datasets_folder = Path("../datasets") |
| 93 | +for city in cities: |
| 94 | + filepath = datasets_folder / f"weather_{city['name'].lower()}.parquet" |
| 95 | + if filepath.exists(): |
| 96 | + print(f"Weather data for {city['name']} already exists at {filepath}.") |
| 97 | + continue |
| 98 | + |
| 99 | + print(f"Downloading weather data for {city['name']}...") |
| 100 | + df = download_weather_data(city) |
| 101 | + df.to_parquet(filepath, index=False) |
| 102 | + print(f"Weather data for {city['name']} saved to {filepath}.") |
| 103 | + |
| 104 | +# %% |
0 commit comments