This repository has been archived by the owner on Jun 19, 2024. It is now read-only.
forked from sony/soundctm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ctm_inference.sh
56 lines (51 loc) · 3.22 KB
/
ctm_inference.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
export CUDA_VISIBLE_DEVICES=0
python ctm_inference.py \
--text_encoder_name "google/flan-t5-large" \
--ctm_unet_model_config "configs/diffusion_model_config.json" \
--training_args '/path/to/checkpoints/soundctm_ckpt/030000/summary.jsonl' \
--model "/path/to/checkpoints/soundctm_ckpt/030000/pytorch_model.bin" \
--ema_model "/path/to/checkpoints/soundctm_ckpt/030000/ema_0.999_030000.pt" \
--test_file "/path/to/audiocaps/test.csv" \
--sampler 'determinisitc' --sampling_gamma 0. --omega 3.5 \
--num_steps 1 --nu 1. --num_samples 1 --batch_size 1 --test_references '/path/to/audiocaps/test' \
--output_dir "/path/to/output_dir/"
# python ctm_inference.py \
# --text_encoder_name "google/flan-t5-large" \
# --ctm_unet_model_config "configs/diffusion_model_config.json" \
# --training_args '/path/to/checkpoints/soundctm_ckpt/030000/summary.jsonl' \
# --model "/path/to/checkpoints/soundctm_ckpt/030000/pytorch_model.bin" \
# --ema_model "/path/to/checkpoints/soundctm_ckpt/030000/ema_0.999_030000.pt" \
# --test_file "/path/to/audiocaps/test.csv" \
# --sampler 'determinisitc' --sampling_gamma 0. --omega 3.5 \
# --num_steps 2 --nu 1. --num_samples 1 --batch_size 1 --test_references '/path/to/audiocaps/test' \
# --output_dir "/path/to/output_dir/"
# python ctm_inference.py \
# --text_encoder_name "google/flan-t5-large" \
# --ctm_unet_model_config "configs/diffusion_model_config.json" \
# --training_args '/path/to/checkpoints/soundctm_ckpt/030000/summary.jsonl' \
# --model "/path/to/checkpoints/soundctm_ckpt/030000/pytorch_model.bin" \
# --ema_model "/path/to/checkpoints/soundctm_ckpt/030000/ema_0.999_030000.pt" \
# --test_file "/path/to/audiocaps/test.csv" \
# --sampler 'determinisitc' --sampling_gamma 0. --omega 3.5 \
# --num_steps 4 --nu 1. --num_samples 1 --batch_size 1 --test_references '/path/to/audiocaps/test' \
# --output_dir "/path/to/output_dir/"
# python ctm_inference.py \
# --text_encoder_name "google/flan-t5-large" \
# --ctm_unet_model_config "configs/diffusion_model_config.json" \
# --training_args '/path/to/checkpoints/soundctm_ckpt/030000/summary.jsonl' \
# --model "/path/to/checkpoints/soundctm_ckpt/030000/pytorch_model.bin" \
# --ema_model "/path/to/checkpoints/soundctm_ckpt/030000/ema_0.999_030000.pt" \
# --test_file "/path/to/audiocaps/test.csv" \
# --sampler 'determinisitc' --sampling_gamma 0. --omega 3.5 \
# --num_steps 8 --nu 1.5 --num_samples 1 --batch_size 1 --test_references '/path/to/audiocaps/test' \
# --output_dir "/path/to/output_dir/"
# python ctm_inference.py \
# --text_encoder_name "google/flan-t5-large" \
# --ctm_unet_model_config "configs/diffusion_model_config.json" \
# --training_args '/path/to/checkpoints/soundctm_ckpt/030000/summary.jsonl' \
# --model "/path/to/checkpoints/soundctm_ckpt/030000/pytorch_model.bin" \
# --ema_model "/path/to/checkpoints/soundctm_ckpt/030000/ema_0.999_030000.pt" \
# --test_file "/path/to/audiocaps/test.csv" \
# --sampler 'determinisitc' --sampling_gamma 0. --omega 3.5 \
# --num_steps 16 --nu 2.0 --num_samples 1 --batch_size 1 --test_references '/path/to/audiocaps/test' \
# --output_dir "/path/to/output_dir/"