Skip to content

Commit 2b146ce

Browse files
authored
Merge pull request #12 from escapecloud/task/update_alternative_technology_dataset
Issue #9 – Fetch Alternative Technology data from remote storage
2 parents 0a37342 + e582020 commit 2b146ce

5 files changed

Lines changed: 138 additions & 4 deletions

File tree

assets/template/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ <h2>Resource Inventory ({{ total_resources }})</h2>
230230
{% for resource_type in resource_inventory %}
231231
<div class="col-md-3 col-sm-12 service-model-item" data-service-model="{{ resource_type.resource_type | trim }}">
232232
<div class="resource-box">
233-
<img src="{{ resource_type.icon | trim }}" alt="{{ resource_type.name | trim }}">
233+
<img src="assets/{{ resource_type.icon | trim }}" alt="{{ resource_type.name | trim }}">
234234
<h3>{{ resource_type.name | trim }}</h3>
235235
<h5>
236236
{{ resource_type.count }} Resource{% if resource_type.count != 1 %}s{% endif %} Available

core/utils_report.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def generate_html_report(report_path, metadata, resource_type_mapping, resource_
144144
str(item["resource_type"]): {
145145
**item,
146146
"name": resource_type_mapping.get(str(item["resource_type"]), {}).get("name", "Unknown Resource"),
147-
"icon": resource_type_mapping.get(str(item["resource_type"]), {}).get("icon", "assets/icons/default.png")
147+
"icon": "/assets" + resource_type_mapping.get(str(item["resource_type"]), {}).get("icon", "/icons/default.png")
148148
}
149149
for item in resource_inventory
150150
}
@@ -342,7 +342,8 @@ def transform_resource_inventory_for_pdf(resource_inventory, resource_type_mappi
342342

343343
resource_name = resource_info.get("name", "Unknown Resource")
344344
# Construct icon_url from the resource_info, default if not found
345-
icon_path = resource_info.get("icon", "/assets/icons/default.png")
345+
icon_path = "/assets" + resource_info.get("icon", "/icons/default.png")
346+
346347
# Prepend report_storage to form the full path to the icon
347348
icon_url = f"{report_path}{icon_path}"
348349

@@ -439,7 +440,7 @@ def transform_alt_tech_for_pdf(resource_inventory, resource_type_mapping, altern
439440
rtype_info = resource_type_mapping.get(rtype_str, {})
440441
resource_name = rtype_info.get("name", "Unknown Resource")
441442

442-
icon_path = rtype_info.get("icon", "/assets/icons/default.png")
443+
icon_path = "/assets" + rtype_info.get("icon", "/icons/default.png")
443444
icon_url = f"{report_path}{icon_path}"
444445

445446
count = alt_counts.get(rtype_str, 0)

datasets/data.db

-688 KB
Binary file not shown.

main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from utils.constants import REGION_CHOICES, REQUIRED_FIELDS_AZURE, REQUIRED_FIELDS_AWS
2828
from utils.validate import validate_region, validate_config
2929
from utils.azure import select_subscription, select_resource_group, is_azure_cli_logged_in
30+
from utils.data import initialize_dataset
3031

3132
# Configure the root logger to ensure logs propagate from all modules
3233
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@@ -417,6 +418,9 @@ def main():
417418
# Print ASCII art
418419
console.print(ascii_art, style="bold cyan")
419420

421+
# Ensure latest dataset is available before proceeding
422+
initialize_dataset()
423+
420424
args = parse_arguments()
421425

422426
# Check if the cloud provider is specified

utils/data.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import os
2+
import gzip
3+
import shutil
4+
import hashlib
5+
import time
6+
import requests
7+
from datetime import datetime, timedelta
8+
from pathlib import Path
9+
from requests.exceptions import RequestException, ConnectionError, Timeout
10+
11+
# Constants
12+
DATASET_FOLDER = Path("datasets")
13+
REMOTE_STORAGE_URL = "https://cloudexit-oss-data-eu.fsn1.your-objectstorage.com"
14+
15+
def get_monday_date():
16+
now = datetime.utcnow()
17+
monday = now - timedelta(days=now.weekday())
18+
19+
if now.weekday() == 0 and now.hour < 8:
20+
last_monday = monday - timedelta(days=7)
21+
return last_monday.strftime("cloudexit-%Y-%m-%d.db.gz")
22+
else:
23+
return monday.strftime("cloudexit-%Y-%m-%d.db.gz")
24+
25+
def compute_file_hash(filepath):
26+
hash_sha256 = hashlib.sha256()
27+
with open(filepath, "rb") as f:
28+
for chunk in iter(lambda: f.read(4096), b""):
29+
hash_sha256.update(chunk)
30+
return hash_sha256.hexdigest()
31+
32+
def download_file(url, destination, retries=3, delay=5):
33+
for attempt in range(retries):
34+
try:
35+
response = requests.get(url, stream=True, timeout=30)
36+
response.raise_for_status()
37+
38+
with open(destination, "wb") as f:
39+
shutil.copyfileobj(response.raw, f)
40+
41+
print(f"[INFO] Download successful: {destination}")
42+
return True
43+
44+
except ConnectionError:
45+
print(f"[ERROR] Connection failed while downloading {url}. Retrying ({attempt + 1}/{retries})...")
46+
except Timeout:
47+
print(f"[ERROR] Request timed out while downloading {url}. Retrying ({attempt + 1}/{retries})...")
48+
except RequestException as e:
49+
print(f"[ERROR] Failed to download {url}: {e}")
50+
break
51+
52+
time.sleep(delay)
53+
54+
print(f"[ERROR] Unable to download file after {retries} attempts: {url}")
55+
return False
56+
57+
def fetch_remote_checksum(checksum_url, retries=3, delay=5):
58+
for attempt in range(retries):
59+
try:
60+
response = requests.get(checksum_url, timeout=10)
61+
response.raise_for_status()
62+
return response.text.strip().split()[0]
63+
64+
except ConnectionError:
65+
print(f"[ERROR] Connection failed when fetching {checksum_url}. Retrying ({attempt + 1}/{retries})...")
66+
except Timeout:
67+
print(f"[ERROR] Request timed out when fetching {checksum_url}. Retrying ({attempt + 1}/{retries})...")
68+
except RequestException as e:
69+
print(f"[ERROR] Failed to fetch {checksum_url}: {e}")
70+
break
71+
72+
time.sleep(delay)
73+
74+
print(f"[ERROR] Unable to fetch remote checksum after {retries} attempts.")
75+
return None
76+
77+
def initialize_dataset():
78+
DATASET_FOLDER.mkdir(exist_ok=True)
79+
80+
latest_file = get_monday_date()
81+
latest_file_url = f"{REMOTE_STORAGE_URL}/{latest_file}"
82+
latest_checksum_url = f"{REMOTE_STORAGE_URL}/{latest_file}.sha256"
83+
latest_symlink_file = f"{REMOTE_STORAGE_URL}/cloudexit-latest.db.gz"
84+
latest_symlink_checksum_url = f"{REMOTE_STORAGE_URL}/cloudexit-latest.db.gz.sha256"
85+
86+
local_db_path = DATASET_FOLDER / "data.db"
87+
local_compressed_path = DATASET_FOLDER / latest_file
88+
89+
# Fetch checksum for the date-based file
90+
remote_checksum = fetch_remote_checksum(latest_checksum_url)
91+
if not remote_checksum:
92+
print(f"[INFO] Unable to fetch remote checksum from {latest_checksum_url}.")
93+
print(f"[INFO] Trying latest symlink from {latest_symlink_checksum_url}...")
94+
remote_checksum = fetch_remote_checksum(latest_symlink_checksum_url)
95+
latest_file_url = latest_symlink_file
96+
latest_file = "cloudexit-latest.db.gz"
97+
local_compressed_path = DATASET_FOLDER / latest_file
98+
99+
if not remote_checksum:
100+
print("[ERROR] Unable to fetch any remote checksum. Skipping update.")
101+
102+
else:
103+
# Check if local compressed file exists
104+
if local_compressed_path.exists():
105+
local_checksum = compute_file_hash(local_compressed_path)
106+
if local_checksum == remote_checksum:
107+
print("[INFO] Local dataset is up-to-date. No download needed.")
108+
return
109+
else:
110+
print("[INFO] Local dataset is outdated. Removing old files and downloading new dataset...")
111+
112+
# Remove all old compressed and extracted files
113+
for file in DATASET_FOLDER.glob("cloudexit-*.db.gz"):
114+
os.remove(file)
115+
if local_db_path.exists():
116+
os.remove(local_db_path)
117+
118+
# Download and extract dataset
119+
if download_file(latest_file_url, local_compressed_path):
120+
print(f"[INFO] Download successful. Extracting dataset from {latest_file}...")
121+
122+
with gzip.open(local_compressed_path, "rb") as f_in, open(local_db_path, "wb") as f_out:
123+
shutil.copyfileobj(f_in, f_out)
124+
125+
print("[INFO] Dataset updated successfully.")
126+
127+
if not any(DATASET_FOLDER.iterdir()):
128+
print("[ERROR] Dataset folder is empty! Cannot proceed without data.")
129+
exit(1)

0 commit comments

Comments
 (0)