config.yaml

# Model configuration
model:
  sample_size: 512
  in_channels: 3
  out_channels: 3
  center_input_sample: false
  time_embedding_type: "positional"
  model_channels: 192
  channel_mult: [1, 2, 3, 4]
  num_blocks: 3
  attn_resolutions: [16, 8]
  dropout: 0.1
  latent_dim: 512  # Latent dimension for StyleGAN
  n_mlp: 8  # Number of MLP layers in StyleGAN mapping network

# Training configuration
training:
  G_steps: 5  # Train generator every 5 steps
  D_lr: 0.00005  # Discriminator learning rate
  G_lr: 0.0002  # Generator learning rate
  r1_weight: 1 #StyleGAN2 default: In the original StyleGAN2 paper, they used an r1_weight of 10 for most of their experiments.
  output_dir: "./speak_stylegan"
  num_epochs: 100
  train_batch_size: 1
  eval_batch_size: 16
  save_image_steps: 100
  gradient_accumulation_steps: 1
  mixed_precision: "fp16"
  epochs_per_resolution: 50  # Number of epochs before increasing resolution
  eval_steps: 1000
  logging_steps: 100
  num_workers: 8
  val_batch_size: 32
  early_stopping_patience: 10
  grad_clip: true
  grad_clip_value: 1.0
  save_epochs: 1
  save_steps: 2500
  initial_resolution: 64  # Starting resolution for progressive growing
  max_resolution: 512  # Maximum resolution for progressive growing
  label_balance: 0.5  # Balance between IRFD loss and StyleGAN loss
  stylegan_loss_weight: 0.1  # Weight for StyleGAN loss
  gp_weight: 10.0
# Dataset configuration
dataset:
  name: "lansinuote/gen.1.celeba"
  split: "train[:80%]"
  image_size: 64  # Initial image size
  val_split: 0.2  # 20% of data for validation

# Optimization configuration
optimization:
  learning_rate: 0.0001
  beta1: 0.5
  beta2: 0.999
  eps: 1e-8
  weight_decay: 0.01

# Logging and evaluation
logging:
  log_with: "tensorboard"
  project_name: "speak_stylegan_training"

# StyleGAN specific configuration
stylegan:
  style_dim: 512
  n_mlp: 8
  channel_multiplier: 2

weights:
  face_recognition: 1
  emotion: 1
  landmark: 1
  
loss:
  alpha: 0.1
  lpips_weight: 1.0
  landmark_weight: 1.0
  emotion_weight: 0
  identity_weight: 1.0