Skip to content

Commit 0e5eaef

Browse files
Merge pull request #9 from hertie-data-science-lab/giorgio-dev
Merge GC notebooks in main
2 parents 4417425 + dcdaa81 commit 0e5eaef

12 files changed

Lines changed: 961 additions & 160 deletions

data/.DS_Store

6 KB
Binary file not shown.

data/generate_pv_masks.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
from PIL import Image
3+
from pathlib import Path
4+
5+
def extract_pv_masks(
6+
src_dir: Path,
7+
dst_dir: Path,
8+
pv_class: int = 0,
9+
background_value: int = 255
10+
):
11+
"""
12+
Extract masks that contain PV modules (class 0) and save a cleaned PV-only mask.
13+
14+
Parameters
15+
----------
16+
src_dir : Path
17+
Directory containing original RID superstructure masks.
18+
dst_dir : Path
19+
Output directory for pv-only masks.
20+
pv_class : int, optional
21+
Class index representing PV modules (default: 0).
22+
background_value : int, optional
23+
Value used for non-PV pixels in the output mask (default: 255 for easy visualization).
24+
"""
25+
dst_dir.mkdir(parents=True, exist_ok=True)
26+
27+
mask_files = list(src_dir.glob("*.png"))
28+
29+
print(f"Found {len(mask_files)} masks to inspect...")
30+
count_saved = 0
31+
32+
for mask_path in mask_files:
33+
mask = np.array(Image.open(mask_path))
34+
35+
# Check whether PV class exists
36+
if pv_class not in np.unique(mask):
37+
continue # skip masks without PV
38+
39+
# Create PV-only mask
40+
pv_mask = np.where(mask == pv_class, pv_class, background_value).astype(np.uint8)
41+
42+
# Save to new directory
43+
out_path = dst_dir / mask_path.name
44+
Image.fromarray(pv_mask).save(out_path)
45+
count_saved += 1
46+
47+
print(f"Saved {count_saved} PV-only masks to: {dst_dir}")
48+
49+
if __name__ == "__main__":
50+
BASE_DIR = Path(__file__).resolve().parent
51+
SRC_MASK_DIR = BASE_DIR / "masks_superstructures_reviewed"
52+
DST_PV_MASK_DIR = BASE_DIR / "masks_pv_modules_only"
53+
54+
extract_pv_masks(
55+
src_dir=SRC_MASK_DIR,
56+
dst_dir=DST_PV_MASK_DIR,
57+
pv_class=0,
58+
background_value=255, # white background for visualization
59+
)

data/load_rid_data.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Minimal setup script for the RID dataset.
4+
5+
Place this script in your `data/` directory and run:
6+
7+
python setup_rid_data_minimal.py
8+
9+
It will:
10+
- rsync ONLY:
11+
- images_roof_centered_geotiff
12+
- masks_superstructures_reviewed
13+
from the TUM server
14+
- download them directly into THIS folder (data/)
15+
- skip downloading if the folder already exists and is non-empty
16+
"""
17+
18+
import subprocess
19+
from pathlib import Path
20+
21+
SERVER = "rsync://m1655470@dataserv.ub.tum.de/m1655470"
22+
BASE_DIR = Path(__file__).resolve().parent # data/
23+
DATA_DIR = BASE_DIR
24+
25+
FOLDERS = [
26+
"images_roof_centered_geotiff",
27+
"masks_superstructures_reviewed",
28+
]
29+
30+
31+
def folder_has_data(path: Path) -> bool:
32+
"""Return True if folder exists and contains at least one file."""
33+
return path.exists() and any(path.iterdir())
34+
35+
36+
def rsync_folder(remote_name: str):
37+
"""Rsync a single folder from the server into DATA_DIR."""
38+
dst = DATA_DIR / remote_name
39+
40+
# Skip if already present and non-empty
41+
if folder_has_data(dst):
42+
print(f"✔ Skipping '{remote_name}' — folder already exists and is not empty.")
43+
return
44+
45+
# Ensure directory exists
46+
dst.mkdir(parents=True, exist_ok=True)
47+
48+
src = f"{SERVER}/{remote_name}/"
49+
cmd = [
50+
"rsync",
51+
"-av",
52+
"--progress",
53+
src,
54+
str(dst) + "/", # ensure trailing slash
55+
]
56+
57+
print(f">>> Downloading '{remote_name}' from server...")
58+
print(" ", " ".join(cmd))
59+
print(">>> You may be asked for the password (m1655470).")
60+
61+
try:
62+
subprocess.run(cmd, check=True)
63+
except FileNotFoundError:
64+
raise SystemExit("ERROR: rsync not found. Please install rsync and try again.")
65+
except subprocess.CalledProcessError as e:
66+
raise SystemExit(f"ERROR: rsync for {remote_name} failed with exit code {e.returncode}.")
67+
68+
69+
def main():
70+
print("=== Minimal RID dataset setup (only 2 folders, with existence check) ===")
71+
print(f"Data directory: {DATA_DIR}")
72+
print()
73+
74+
for folder in FOLDERS:
75+
rsync_folder(folder)
76+
print()
77+
78+
print("=== Done! ===")
79+
print("Folders now present:")
80+
for folder in FOLDERS:
81+
print(" •", DATA_DIR / folder)
82+
83+
84+
if __name__ == "__main__":
85+
main()

notebooks/meta_training_loss.png

2.34 KB
Loading

notebooks/predicted_masks.png

2.34 KB
Loading

notebooks/rid_roof_segments.ipynb

Lines changed: 130 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/try_few_shot.ipynb

Lines changed: 317 additions & 125 deletions
Large diffs are not rendered by default.

notebooks/tutorial_few_shot_learning.ipynb

Lines changed: 191 additions & 34 deletions
Large diffs are not rendered by default.
4.4 MB
Loading
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
---
2+
title: "Few-Shot Learning for Rooftop Detection in Satellite Imagery"
3+
subtitle: "Deep Learning Tutorial"
4+
author: "Giorgio Coppala, Nadine Daum, Elena Dreyer, Nico Reichardt"
5+
bibliography: refs.bib
6+
7+
8+
resources:
9+
- img/**
10+
11+
format:
12+
revealjs:
13+
theme: dimmery.scss
14+
slide-number: true
15+
default-image-width: 70%
16+
preview-links: auto
17+
logo: ""
18+
footer: ""
19+
transition: slide
20+
background-transition: fade
21+
self-contained: true
22+
html-math-method:
23+
method: mathjax
24+
url: https://cdn.jsdelivr.net/npm/mathjax@4/tex-mml-chtml.js
25+
include-in-header: include.html
26+
resources:
27+
- img/**
28+
---
29+
30+
31+
## Policy Relevance
32+
33+
- Many public auhorities face the problem of **limited labeled data**
34+
(annotation is expensive, slow, or requires domain expertise)
35+
36+
- **Applications:**
37+
- medical sector: **rare disease detection**
38+
- emergency management: **flood extent mapping**
39+
- climate & energy: **solar PV rooftop assessment**
40+
- urban planning: **building footprints & infrastructure mapping**
41+
42+
- **Few-shot learning (FSL)** can help:
43+
- Learns to **generalize** from *1–5 labeled support examples per class*
44+
- (in our case) learns a **feature embedding** and constructs **class prototypes**
45+
- Enables segmentation in a **new city** with *minimal additional annotation*
46+
47+
48+
49+
## Problem Setting
50+
51+
::: {.columns}
52+
53+
::: {.column width="55%"}
54+
55+
- Goal of the tutorial: apply **Prototypical Networks** to
56+
rooftop segmentation using only a few labeled tiles
57+
58+
- **Few-shot segmentation** allows the model to learn characteristic
59+
rooftop shapes and textures from a small Geneva subset
60+
61+
- Demonstrates how rooftop maps can be produced for solar potential estimation in a **new geographic setting** with limited labels
62+
63+
:::
64+
65+
::: {.column width="45%"}
66+
![](figures/picture_use_case.png){width="100%" style="margin-top: 1rem;"}
67+
68+
<div style="font-size: 0.75rem; color:#666; text-align:center; margin-top:0.2rem;">
69+
Demonstration use case (self-made visualization)
70+
</div>
71+
:::
72+
73+
:::
74+
75+
76+
## Dataset: [Roofs of Geneva](https://huggingface.co/datasets/raphaelattias/overfitteam-geneva-satellite-images)
77+
78+
- **Size**: 1,050 labeled image-mask pairs
79+
80+
- **Task**: Binary segmentation masks (rooftop vs background)
81+
82+
- **Geographic splits**: 3 grids/ neighborhoods (North, Center, South)
83+
84+
- **Image size**: 250x250 pixels
85+
86+
- **Categories**: Industrial, Residential
87+
88+
89+
## Inside the dataset
90+
91+
<div style="text-align:center;">
92+
![](figures/grids_animation.gif){width="50%"}
93+
</div>
94+
95+
<div style="font-size:0.75rem; text-align:center; color:#666; margin-top:0.5rem;">
96+
Geneva Animation: raw image → overlay rooftop → binary mask
97+
</div>
98+
99+
100+
## Discussion
101+
102+
**Room for improvement:**
103+
104+
- Fine-tune / tweak model parameters
105+
- Add regularization
106+
- Increase number of epochs
107+
108+
- Implement rough approximation of solar potential
109+
- e.g. based on IoU over roof area
110+
111+
112+
**Open for discussion:**
113+
114+
- Try a different encoder ?
115+
- e.g. ResNet-50
116+
117+
- Change train / test split strategy ?
118+
- e.g. random shuffle regardless of geographic regions
119+
120+
121+
122+
<div style="text-align:center; margin-top:3.5em; font-size:1.1em;">
123+
<a href="https://github.com/hertie-data-science-lab/tutorial-new-tutorial-group-1/tree/main"
124+
target="_blank"
125+
style="text-decoration:none;">
126+
GitHub Repo
127+
</a>
128+
</div>
129+
130+
131+
132+
## References
133+
134+
::: {.refs-super-small}
135+
136+
- **Alsentzer, E., Li, M. M., Kobren, S. N., Noori, A., Undiagnosed Diseases Network, Kohane, I. S., & Zitnik, M.** (2025). Few shot learning for phenotype-driven diagnosis of patients with rare genetic diseases. *npj Digital Medicine, 8*(1), 380. https://doi.org/10.1038/s41746-025-01749-1
137+
138+
- **Castello, R., Walch, A., Attias, R., Cadei, R., Jiang, S., & Scartezzini, J.-L.** (2021). Quantification of the suitable rooftop area for solar panel installation from overhead imagery using convolutional neural networks. *Journal of Physics: Conference Series, 2042*(1), 012002. https://doi.org/10.1088/1742-6596/2042/1/012002
139+
140+
- **Chen, Y., Wei, C., Wang, D., Ji, C., & Li, B.** (2022). Semi-supervised contrastive learning for few-shot segmentation of remote sensing images. *Remote Sensing, 14*(17), 4254. https://doi.org/10.3390/rs14174254
141+
142+
- **Ding, H., Zhang, H., & Jiang, X.** (2022). Self-regularized prototypical network for few-shot semantic segmentation. *Pattern Recognition, 132*, 109018. https://doi.org/10.1016/j.patcog.2022.109018
143+
144+
- **Finn, C., Abbeel, P., & Levine, S.** (2017). Model-agnostic meta-learning for fast adaptation of deep networks. In *International Conference on Machine Learning* (pp. 1126–1135). https://doi.org/10.48550/arXiv.1703.03400
145+
146+
- **Ge, Z., Fan, X., Zhang, J., & Jin, S.** (2025). SegPPD-FS: Segmenting plant pests and diseases in the wild using few-shot learning. *Plant Phenomics*, 100121. https://doi.org/10.1016/j.plaphe.2025.100121
147+
148+
- **Hu, Y., Liu, C., Li, Z., Xu, J., Han, Z., & Guo, J.** (2022). Few-shot building footprint shape classification with relation network. *ISPRS International Journal of Geo-Information, 11*(5), 311. https://doi.org/10.3390/ijgi11050311
149+
150+
- **Jadon, S.** (2021). COVID-19 detection from scarce chest X-ray image data using few-shot deep learning. In *Medical Imaging 2021* (pp. 161–170). https://doi.org/10.1117/12.2581496
151+
152+
- **Lee, G. Y., Dam, T., Ferdaus, M. M., Poenar, D. P., & Duong, V.** (2025). Enhancing Few-Shot Classification of Benchmark and Disaster Imagery with ATTBHFA-Net. *arXiv preprint* arXiv:2510.18326. https://doi.org/10.48550/arXiv.2510.18326
153+
154+
- **Li, X., He, Z., Zhang, L., Guo, S., Hu, B., & Guo, K.** (2025). CDCNet: Cross-domain few-shot learning with adaptive representation enhancement. *Pattern Recognition, 162*, 111382. https://doi.org/10.1016/j.patcog.2025.111382
155+
156+
- **Puthumanaillam, G., & Verma, U.** (2023). Texture based prototypical network for few-shot semantic segmentation of forest cover: Generalizing for different geographical regions. *Neurocomputing, 538*, 126201. https://doi.org/10.1016/j.neucom.2023.03.062
157+
158+
- **Snell, J., Swersky, K., & Zemel, R.** (2017). Prototypical networks for few-shot learning. *Advances in Neural Information Processing Systems, 30*. https://doi.org/10.48550/arXiv.1703.05175
159+
160+
- **Sung, F., Yang, Y., Zhang, L., Xiang, T., Torr, P. H., & Hospedales, T. M.** (2018). Learning to compare: Relation network for few-shot learning. In *CVPR* (pp. 1199–1208). https://doi.org/10.1109/CVPR.2018.00131
161+
:::
162+
163+

0 commit comments

Comments
 (0)