-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.sh
139 lines (124 loc) · 4.62 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env sh
set -eu # o pipefail
GPU=${GPU:-0,1}
PORT=${PORT:-29500}
N_GPUS=${N_GPUS:-2}
OPTIM=adamw
LR=0.001
WD=0.01
SCHEDULER=cosa
MODE=epoch
N_EPOCHS=800
T_MAX=800
loss=nrmse
attn=scse
data_dir=./data
chkps_dir=./models
backbone=tf_efficientnetv2_l_in21k
BS=8
FOLD=0
CHECKPOINT=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7
MASTER_PORT="${PORT}" CUDA_VISIBLE_DEVICES="${GPU}" torchrun --nproc_per_node="${N_GPUS}" \
./src/train.py \
--train-df $data_dir/features_metadata_FzP19JI.csv \
--train-images-dir $data_dir/train_features \
--train-labels-dir $data_dir/train_agbm \
--backbone "${backbone}" \
--loss "${loss}" \
--in-channels 15 \
--optim "${OPTIM}" \
--learning-rate "${LR}" \
--weight-decay "${WD}" \
--scheduler "${SCHEDULER}" \
--T-max "${T_MAX}" \
--num-epochs "${N_EPOCHS}" \
--checkpoint-dir "${CHECKPOINT}" \
--fold "${FOLD}" \
--scheduler-mode "${MODE}" \
--batch-size "${BS}" \
--augs \
--dec-attn-type $attn \
--dec-channels 384 368 352 336 320 \
--fp16 \
LR=0.0001
N_EPOCHS=100
T_MAX=100
CHECKPOINT_LOAD=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7
CHECKPOINT=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7_plus800eb
MASTER_PORT="${PORT}" CUDA_VISIBLE_DEVICES="${GPU}" torchrun --nproc_per_node="${N_GPUS}" \
./src/train.py \
--train-df $data_dir/features_metadata_FzP19JI.csv \
--train-images-dir $data_dir/train_features \
--train-labels-dir $data_dir/train_agbm \
--backbone "${backbone}" \
--loss "${loss}" \
--in-channels 15 \
--optim "${OPTIM}" \
--learning-rate "${LR}" \
--weight-decay "${WD}" \
--scheduler "${SCHEDULER}" \
--T-max "${T_MAX}" \
--num-epochs "${N_EPOCHS}" \
--checkpoint-dir "${CHECKPOINT}" \
--fold "${FOLD}" \
--scheduler-mode "${MODE}" \
--batch-size "${BS}" \
--load $CHECKPOINT_LOAD/model_last.pth \
--augs \
--dec-attn-type $attn \
--dec-channels 384 368 352 336 320 \
--fp16 \
N_EPOCHS=100
T_MAX=100
CHECKPOINT_LOAD=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7_plus800eb
CHECKPOINT=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7_plus800eb_100ft
MASTER_PORT="${PORT}" CUDA_VISIBLE_DEVICES="${GPU}" torchrun --nproc_per_node="${N_GPUS}" \
./src/train.py \
--train-df $data_dir/features_metadata_FzP19JI.csv \
--train-images-dir $data_dir/train_features \
--train-labels-dir $data_dir/train_agbm \
--backbone "${backbone}" \
--loss "${loss}" \
--in-channels 15 \
--optim "${OPTIM}" \
--learning-rate "${LR}" \
--weight-decay "${WD}" \
--scheduler "${SCHEDULER}" \
--T-max "${T_MAX}" \
--num-epochs "${N_EPOCHS}" \
--checkpoint-dir "${CHECKPOINT}" \
--fold "${FOLD}" \
--scheduler-mode "${MODE}" \
--batch-size "${BS}" \
--load $CHECKPOINT_LOAD/model_last.pth \
--augs \
--dec-attn-type $attn \
--dec-channels 384 368 352 336 320 \
--fp16 \
--ft \
CHECKPOINT_LOAD=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7_plus800eb_100ft
CHECKPOINT=$chkps_dir/"${backbone}"_f"${FOLD}"_b"${BS}"x"${N_GPUS}"_e"${N_EPOCHS}"_"${loss}"_devscse_attnlin_augs_decplus7_plus800eb_200ft
MASTER_PORT="${PORT}" CUDA_VISIBLE_DEVICES="${GPU}" torchrun --nproc_per_node="${N_GPUS}" \
./src/train.py \
--train-df $data_dir//features_metadata_FzP19JI.csv \
--train-images-dir $data_dir/train_features \
--train-labels-dir $data_dir/train_agbm \
--backbone "${backbone}" \
--loss "${loss}" \
--in-channels 15 \
--optim "${OPTIM}" \
--learning-rate "${LR}" \
--weight-decay "${WD}" \
--scheduler "${SCHEDULER}" \
--T-max "${T_MAX}" \
--num-epochs "${N_EPOCHS}" \
--checkpoint-dir "${CHECKPOINT}" \
--fold "${FOLD}" \
--scheduler-mode "${MODE}" \
--batch-size "${BS}" \
--load $CHECKPOINT_LOAD/model_last.pth \
--augs \
--dec-attn-type $attn \
--dec-channels 384 368 352 336 320 \
--fp16 \
--ft \