Diffusers
Fudan-FUXI commited on
Commit
69516ef
·
verified ·
1 Parent(s): 190d073

Upload 2 files

Browse files
transformer/config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ type: PixArtVideo_XL_1x2x2
3
+ space_scale: 0.5
4
+ time_scale: 1.0
5
+ mlp_type: "llama"
6
+ #enable_rope: True
7
+ position_embed_spaltial: "absolute"
8
+ position_embed_temporal: "rope"
9
+
10
+ norm_type: "llamarmsnorm"
11
+ in_channels: 8 # to be consistent with videovae
12
+ temp_window_size: [-1, 8, 8] # windown attn for temporal-attn
13
+ adain_with_text: True
14
+ qk_norm: False
15
+
16
+ prob_text_condition: 1.0
17
+ prob_img_condition: 0
18
+ prob_img_condition_attn: 0
19
+
20
+ class_dropout_prob: 0.1
21
+
22
+ grad_checkpointing: True
23
+
24
+ enable_frames_embedder: False
25
+ enable_tgt_size_embedder: False
26
+
27
+ clip_image_encoder: "pretrain_models/openai/clip-vit-large-patch14"
28
+
29
+ vae:
30
+ type: "CausualVAEVideo"
31
+ # z=8
32
+ config: "configs/vae_config.yaml"
33
+ from_pretrained: "./pretrain_model/vidgen/vae/vae_pytorch_model.bin"
34
+
35
+
36
+ text_encoder:
37
+ type: "t5"
38
+ from_pretrained: "pretrain_models/"
39
+ model_max_length: 200
40
+ shardformer: True
41
+
42
+
43
+ diffusion:
44
+ type: "IDDPM"
45
+ snr: False
46
+ train_sampling_steps: 1000
47
+ prob_self_condition: 0
48
+ v_predict: False
49
+
50
+
51
+ optimizer:
52
+ learning_rate: 1e-4
53
+ weight_decay: 0
54
+ eps: 1e-8
55
+ min_lr_ratio: 0.95
56
+ gradient_clip: 1.0
57
+
58
+ num_frames_video: 17 # base frames of one video slice
59
+ num_slice_for_long_video: -1 # how many 2s slice is the long video be split, -1 denotes dynamic
60
+
61
+ resolution_video: -1
62
+ resolution_image: -1
63
+ mode_various_resolution: False
64
+
65
+ precision: "bf16"
66
+ seed: 42
67
+ workers: 4
68
+ grad_checkpoint: False
69
+ gradient_accumulation_steps: 4
70
+ logging_steps: 10
transformer/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df86cfb1fcaaeec882535ce381e99a6b9c98a59e82291c61ec5b25c76d7087b
3
+ size 3636367638