Commit
·
411a72a
1
Parent(s):
b370f23
commit
Browse files
Sci_Fi_frame_inbetweening.py
CHANGED
|
@@ -79,12 +79,12 @@ def generate_video(
|
|
| 79 |
scheduler = CogVideoXDDIMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
|
| 80 |
|
| 81 |
# 2. Load the pre-trained EF_Net
|
| 82 |
-
|
| 83 |
ckpt = torch.load(EF_Net_model_path, map_location='cpu', weights_only=False)
|
| 84 |
EF_Net_state_dict = {}
|
| 85 |
for name, params in ckpt['state_dict'].items():
|
| 86 |
EF_Net_state_dict[name] = params
|
| 87 |
-
m, u =
|
| 88 |
print(f'[ Weights from pretrained EF-Net was loaded into EF-Net ] [M: {len(m)} | U: {len(u)}]')
|
| 89 |
|
| 90 |
#3. Load the prompt (Can be modified independently according to specific needs.)
|
|
@@ -98,7 +98,7 @@ def generate_video(
|
|
| 98 |
text_encoder=text_encoder,
|
| 99 |
transformer=transformer,
|
| 100 |
vae=vae,
|
| 101 |
-
|
| 102 |
scheduler=scheduler,
|
| 103 |
)
|
| 104 |
pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
|
|
|
| 79 |
scheduler = CogVideoXDDIMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
|
| 80 |
|
| 81 |
# 2. Load the pre-trained EF_Net
|
| 82 |
+
EF_Net_model = EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48,).requires_grad_(False).eval()
|
| 83 |
ckpt = torch.load(EF_Net_model_path, map_location='cpu', weights_only=False)
|
| 84 |
EF_Net_state_dict = {}
|
| 85 |
for name, params in ckpt['state_dict'].items():
|
| 86 |
EF_Net_state_dict[name] = params
|
| 87 |
+
m, u = EF_Net_model.load_state_dict(EF_Net_state_dict, strict=False)
|
| 88 |
print(f'[ Weights from pretrained EF-Net was loaded into EF-Net ] [M: {len(m)} | U: {len(u)}]')
|
| 89 |
|
| 90 |
#3. Load the prompt (Can be modified independently according to specific needs.)
|
|
|
|
| 98 |
text_encoder=text_encoder,
|
| 99 |
transformer=transformer,
|
| 100 |
vae=vae,
|
| 101 |
+
EF_Net_model=EF_Net_model,
|
| 102 |
scheduler=scheduler,
|
| 103 |
)
|
| 104 |
pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
Sci_Fi_frame_inbetweening.sh
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
-
export CUDA_VISIBLE_DEVICES=
|
| 2 |
EVAL_DIR=/home/lhchen/Sci-Fi/example_input_pairs
|
| 3 |
MODEL_NAME=CogVideoX-5b-I2V
|
| 4 |
OUT_DIR=outputs
|
|
|
|
| 5 |
|
| 6 |
mkdir -p $OUT_DIR
|
| 7 |
for example_dir in $(ls -d $EVAL_DIR/*)
|
| 8 |
do
|
| 9 |
-
example_name=$(
|
| 10 |
echo $example_name
|
| 11 |
|
| 12 |
out_fn=$OUT_DIR/$example_name'.mp4'
|
|
|
|
| 1 |
+
export CUDA_VISIBLE_DEVICES=7
|
| 2 |
EVAL_DIR=/home/lhchen/Sci-Fi/example_input_pairs
|
| 3 |
MODEL_NAME=CogVideoX-5b-I2V
|
| 4 |
OUT_DIR=outputs
|
| 5 |
+
basename=eval_videos_dir
|
| 6 |
|
| 7 |
mkdir -p $OUT_DIR
|
| 8 |
for example_dir in $(ls -d $EVAL_DIR/*)
|
| 9 |
do
|
| 10 |
+
example_name=$(basename $example_dir)
|
| 11 |
echo $example_name
|
| 12 |
|
| 13 |
out_fn=$OUT_DIR/$example_name'.mp4'
|
Sci_Fi_inbetweening_pipeline.py
CHANGED
|
@@ -177,7 +177,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
|
|
| 177 |
text_encoder: T5EncoderModel,
|
| 178 |
vae: AutoencoderKLCogVideoX,
|
| 179 |
transformer: CogVideoXTransformer3DModel,
|
| 180 |
-
|
| 181 |
scheduler: CogVideoXDDIMScheduler,
|
| 182 |
):
|
| 183 |
super().__init__()
|
|
@@ -187,7 +187,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
|
|
| 187 |
text_encoder=text_encoder,
|
| 188 |
vae=vae,
|
| 189 |
transformer=transformer,
|
| 190 |
-
|
| 191 |
scheduler=scheduler,
|
| 192 |
)
|
| 193 |
self.vae_scale_factor_spatial = (
|
|
@@ -742,7 +742,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
|
|
| 742 |
EF_Net_states = []
|
| 743 |
if (EF_Net_guidance_start <= current_sampling_percent < EF_Net_guidance_end):
|
| 744 |
# extract EF_Net hidden state
|
| 745 |
-
EF_Net_states = self.
|
| 746 |
hidden_states=latent_image_input[:,:,0:16,:,:],
|
| 747 |
encoder_hidden_states=prompt_embeds,
|
| 748 |
image_rotary_emb=None,
|
|
|
|
| 177 |
text_encoder: T5EncoderModel,
|
| 178 |
vae: AutoencoderKLCogVideoX,
|
| 179 |
transformer: CogVideoXTransformer3DModel,
|
| 180 |
+
EF_Net_model: EF_Net,
|
| 181 |
scheduler: CogVideoXDDIMScheduler,
|
| 182 |
):
|
| 183 |
super().__init__()
|
|
|
|
| 187 |
text_encoder=text_encoder,
|
| 188 |
vae=vae,
|
| 189 |
transformer=transformer,
|
| 190 |
+
EF_Net_model=EF_Net_model,
|
| 191 |
scheduler=scheduler,
|
| 192 |
)
|
| 193 |
self.vae_scale_factor_spatial = (
|
|
|
|
| 742 |
EF_Net_states = []
|
| 743 |
if (EF_Net_guidance_start <= current_sampling_percent < EF_Net_guidance_end):
|
| 744 |
# extract EF_Net hidden state
|
| 745 |
+
EF_Net_states = self.EF_Net_model(
|
| 746 |
hidden_states=latent_image_input[:,:,0:16,:,:],
|
| 747 |
encoder_hidden_states=prompt_embeds,
|
| 748 |
image_rotary_emb=None,
|