Skip to content

Commit fc89ada

Browse files
committed
pb de gpu
1 parent 136b88b commit fc89ada

2 files changed

Lines changed: 8 additions & 6 deletions

File tree

training/argo-workflows/train-workflow.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ spec:
1616
value: '[
1717
{ "SOURCE": "SENTINEL2",
1818
"N_BANDS": "14",
19-
"DATASETS": ["AT332_2018","BE100_2018","BE251_2018","BG322_2018","CY000_2018","CZ072_2018","DEA54_2018","DK041_2018","EE00A_2018","EL521_2018","ES612_2018","FI1C1_2018","FRJ27_2018","FRK26_2018","HR050_2018","IE061_2018","ITI32_2018","LT028_2018","LU000_2018","LV008_2018","MT001_2018","NL33C_2018","PL414_2018","PT16I_2018","RO123_2018","SI035_2018","SK022_2018","UKJ22_2018","AT332_2021","BE100_2021","BE251_2021","BG322_2021","CY000_2021","CZ072_2021","DEA54_2021","DK041_2021","EE00A_2021","EL521_2021","ES612_2021","FI1C1_2021","FRJ27_2021","FRK26_2021","HR050_2021","IE061_2021","ITI32_2021","LT028_2021","LU000_2021","LV008_2021","MT001_2021","NL33C_2021","PL414_2021","PT16I_2021","RO123_2021","SI035_2021","SK022_2021"],
19+
"DATASETS": ["AT332_2018","BE100_2018","BE251_2018","BG322_2018","CY000_2018","CZ072_2018","DEA54_2018","DK041_2018","EE00A_2018","EL521_2018","ES612_2018","FI1C1_2018","FRJ27_2018","FRK26_2018","HR050_2018","IE061_2018","ITI32_2018","LT028_2018","LU000_2018","LV008_2018","MT001_2018","NL33C_2018","PL414_2018","PT16I_2018","RO123_2018","SI035_2018","SK022_2018","UKJ22_2018","AT332_2021","BE100_2021","BE251_2021","BG322_2021","CY000_2021","CZ072_2021","DEA54_2021","EE00A_2021","EL521_2021","ES612_2021","FI1C1_2021","FRJ27_2021","FRK26_2021","HR050_2021","IE061_2021","ITI32_2021","LT028_2021","LU000_2021","LV008_2021","MT001_2021","NL33C_2021","PL414_2021","PT16I_2021","RO123_2021","SI035_2021","SK022_2021"],
2020
"TILES_SIZE": 250,
2121
"AUGMENT_SIZE": 512,
2222
"TYPE_LABELER": "CLCplus-Backbone",
2323
"USE_S3": 0,
24-
"EPOCHS": 1,
25-
"BATCH_SIZE": 4,
26-
"TEST_BATCH_SIZE": 2,
24+
"EPOCHS": 50,
25+
"BATCH_SIZE": 8,
26+
"TEST_BATCH_SIZE": 4,
2727
"LR": 0.00005,
2828
"BUILDING_CLASS_WEIGHT": 10.0,
2929
"LOSS_NAME": "cross_entropy_weighted",
@@ -113,6 +113,8 @@ spec:
113113
- name: LOGITS
114114
- name: FREEZE_ENCODER
115115
- name: CUDA
116+
nodeSelector:
117+
gpu-vram: 80GB
116118
container:
117119
image: inseefrlab/onyxia-python-pytorch:py3.13.12-gpu
118120
imagePullPolicy: Always
@@ -183,4 +185,4 @@ spec:
183185
name: mlflow
184186
key: password
185187
- name: ENTRY_POINT
186-
value: main
188+
value: main

training/src/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def main(
281281
random.seed(0)
282282
np.random.seed(0)
283283

284-
kwargs = {"num_workers": os.cpu_count(), "pin_memory": True} if args.cuda else {}
284+
kwargs = {"num_workers": 0, "pin_memory": False} if args.cuda else {}
285285

286286
earlystop = {"monitor": "val_loss", "patience": patience, "mode": "min"}
287287
checkpoints = [

0 commit comments

Comments
 (0)