LiuhanChen commited on
Commit
411a72a
·
1 Parent(s): b370f23
Sci_Fi_frame_inbetweening.py CHANGED
@@ -79,12 +79,12 @@ def generate_video(
79
  scheduler = CogVideoXDDIMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
80
 
81
  # 2. Load the pre-trained EF_Net
82
- EF_Net = EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48,).requires_grad_(False).eval()
83
  ckpt = torch.load(EF_Net_model_path, map_location='cpu', weights_only=False)
84
  EF_Net_state_dict = {}
85
  for name, params in ckpt['state_dict'].items():
86
  EF_Net_state_dict[name] = params
87
- m, u = EF_Net.load_state_dict(EF_Net_state_dict, strict=False)
88
  print(f'[ Weights from pretrained EF-Net was loaded into EF-Net ] [M: {len(m)} | U: {len(u)}]')
89
 
90
  #3. Load the prompt (Can be modified independently according to specific needs.)
@@ -98,7 +98,7 @@ def generate_video(
98
  text_encoder=text_encoder,
99
  transformer=transformer,
100
  vae=vae,
101
- EF_Net=EF_Net,
102
  scheduler=scheduler,
103
  )
104
  pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 
79
  scheduler = CogVideoXDDIMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
80
 
81
  # 2. Load the pre-trained EF_Net
82
+ EF_Net_model = EF_Net(num_layers=4, downscale_coef=8, in_channels=2, num_attention_heads=48,).requires_grad_(False).eval()
83
  ckpt = torch.load(EF_Net_model_path, map_location='cpu', weights_only=False)
84
  EF_Net_state_dict = {}
85
  for name, params in ckpt['state_dict'].items():
86
  EF_Net_state_dict[name] = params
87
+ m, u = EF_Net_model.load_state_dict(EF_Net_state_dict, strict=False)
88
  print(f'[ Weights from pretrained EF-Net was loaded into EF-Net ] [M: {len(m)} | U: {len(u)}]')
89
 
90
  #3. Load the prompt (Can be modified independently according to specific needs.)
 
98
  text_encoder=text_encoder,
99
  transformer=transformer,
100
  vae=vae,
101
+ EF_Net_model=EF_Net_model,
102
  scheduler=scheduler,
103
  )
104
  pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
Sci_Fi_frame_inbetweening.sh CHANGED
@@ -1,12 +1,13 @@
1
- export CUDA_VISIBLE_DEVICES=6
2
  EVAL_DIR=/home/lhchen/Sci-Fi/example_input_pairs
3
  MODEL_NAME=CogVideoX-5b-I2V
4
  OUT_DIR=outputs
 
5
 
6
  mkdir -p $OUT_DIR
7
  for example_dir in $(ls -d $EVAL_DIR/*)
8
  do
9
- example_name=$(EVAL_DIR $example_dir)
10
  echo $example_name
11
 
12
  out_fn=$OUT_DIR/$example_name'.mp4'
 
1
+ export CUDA_VISIBLE_DEVICES=7
2
  EVAL_DIR=/home/lhchen/Sci-Fi/example_input_pairs
3
  MODEL_NAME=CogVideoX-5b-I2V
4
  OUT_DIR=outputs
5
+ basename=eval_videos_dir
6
 
7
  mkdir -p $OUT_DIR
8
  for example_dir in $(ls -d $EVAL_DIR/*)
9
  do
10
+ example_name=$(basename $example_dir)
11
  echo $example_name
12
 
13
  out_fn=$OUT_DIR/$example_name'.mp4'
Sci_Fi_inbetweening_pipeline.py CHANGED
@@ -177,7 +177,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
177
  text_encoder: T5EncoderModel,
178
  vae: AutoencoderKLCogVideoX,
179
  transformer: CogVideoXTransformer3DModel,
180
- EF_Net: EF_Net,
181
  scheduler: CogVideoXDDIMScheduler,
182
  ):
183
  super().__init__()
@@ -187,7 +187,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
187
  text_encoder=text_encoder,
188
  vae=vae,
189
  transformer=transformer,
190
- EF_Net=EF_Net,
191
  scheduler=scheduler,
192
  )
193
  self.vae_scale_factor_spatial = (
@@ -742,7 +742,7 @@ class CogVideoXEFNetInbetweeningPipeline(DiffusionPipeline, CogVideoXLoraLoaderM
742
  EF_Net_states = []
743
  if (EF_Net_guidance_start <= current_sampling_percent < EF_Net_guidance_end):
744
  # extract EF_Net hidden state
745
- EF_Net_states = self.EF_Net(
746
  hidden_states=latent_image_input[:,:,0:16,:,:],
747
  encoder_hidden_states=prompt_embeds,
748
  image_rotary_emb=None,
 
177
  text_encoder: T5EncoderModel,
178
  vae: AutoencoderKLCogVideoX,
179
  transformer: CogVideoXTransformer3DModel,
180
+ EF_Net_model: EF_Net,
181
  scheduler: CogVideoXDDIMScheduler,
182
  ):
183
  super().__init__()
 
187
  text_encoder=text_encoder,
188
  vae=vae,
189
  transformer=transformer,
190
+ EF_Net_model=EF_Net_model,
191
  scheduler=scheduler,
192
  )
193
  self.vae_scale_factor_spatial = (
 
742
  EF_Net_states = []
743
  if (EF_Net_guidance_start <= current_sampling_percent < EF_Net_guidance_end):
744
  # extract EF_Net hidden state
745
+ EF_Net_states = self.EF_Net_model(
746
  hidden_states=latent_image_input[:,:,0:16,:,:],
747
  encoder_hidden_states=prompt_embeds,
748
  image_rotary_emb=None,