-
Notifications
You must be signed in to change notification settings - Fork 66
/
SD-2-base-256.yaml
88 lines (88 loc) · 2.4 KB
/
SD-2-base-256.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
project: # Insert wandb project name
name: # Insert wandb run name
seed: 17
eval_first: false
algorithms:
low_precision_groupnorm:
attribute: unet
precision: amp_fp16
low_precision_layernorm:
attribute: unet
precision: amp_fp16
model:
_target_: diffusion.models.models.stable_diffusion_2
pretrained: false
precomputed_latents: true
encode_latents_in_fp16: true
fsdp: true
val_metrics:
- _target_: torchmetrics.MeanSquaredError
dataset:
train_batch_size: 2048 # Global training batch size
eval_batch_size: 1024 # Global evaluation batch size
train_dataset:
_target_: diffusion.datasets.laion.laion.build_streaming_laion_dataloader
remote:
# Path to object store bucket(s)
local:
# Path to corresponding local dataset(s)
tokenizer_name_or_path: stabilityai/stable-diffusion-2-base
caption_drop_prob: 0.1
resize_size: 256
drop_last: true
shuffle: true
prefetch_factor: 2
num_workers: 8
persistent_workers: true
pin_memory: true
download_timeout: 300
num_canonical_nodes: 64
eval_dataset:
_target_: diffusion.datasets.coco.coco_captions.build_streaming_cocoval_dataloader
remote: # Path to object store bucket
local: # Path to local dataset cache
resize_size: 256
prefetch_factor: 2
num_workers: 8
persistent_workers: True
pin_memory: True
optimizer:
_target_: torch.optim.AdamW
lr: 1.0e-4
weight_decay: 0.01
scheduler:
_target_: composer.optim.LinearWithWarmupScheduler
t_warmup: 10000ba
alpha_f: 1.0
logger:
wandb:
_target_: composer.loggers.wandb_logger.WandBLogger
name: ${name}
project: ${project}
group: ${name}
callbacks:
speed_monitor:
_target_: composer.callbacks.speed_monitor.SpeedMonitor
window_size: 10
lr_monitor:
_target_: composer.callbacks.lr_monitor.LRMonitor
memory_monitor:
_target_: composer.callbacks.memory_monitor.MemoryMonitor
runtime_estimator:
_target_: composer.callbacks.runtime_estimator.RuntimeEstimator
optimizer_monitor:
_target_: composer.callbacks.OptimizerMonitor
trainer:
_target_: composer.Trainer
device: gpu
max_duration: 550000ba
eval_interval: 10000ba
device_train_microbatch_size: 16
run_name: ${name}
seed: ${seed}
save_folder: # Insert path to save folder or bucket
save_interval: 10000ba
save_overwrite: true
autoresume: false
fsdp_config:
sharding_strategy: "SHARD_GRAD_OP"