Spaces:
Running
on
Zero
Running
on
Zero
Update skyreelsinfer/pipelines/pipeline_skyreels_video.py
Browse files
skyreelsinfer/pipelines/pipeline_skyreels_video.py
CHANGED
@@ -14,7 +14,7 @@ from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import MultiPipeli
|
|
14 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import PipelineCallback
|
15 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import retrieve_timesteps
|
16 |
from PIL import Image
|
17 |
-
|
18 |
|
19 |
def resizecrop(image, th, tw):
|
20 |
w, h = image.size
|
@@ -241,8 +241,11 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
241 |
else:
|
242 |
batch_size = prompt_embeds.shape[0]
|
243 |
if self.text_encoder.device.type == 'cpu':
|
|
|
|
|
|
|
244 |
self.text_encoder.to("cuda")
|
245 |
-
|
246 |
# 3. Encode input prompt
|
247 |
(
|
248 |
prompt_embeds,
|
@@ -313,6 +316,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
313 |
)
|
314 |
|
315 |
self.text_encoder.to("cpu")
|
|
|
316 |
torch.cuda.empty_cache()
|
317 |
torch.cuda.reset_peak_memory_stats()
|
318 |
|
@@ -345,6 +349,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
345 |
if hasattr(self, "text_encoder_to_cpu"):
|
346 |
self.text_encoder_to_cpu()
|
347 |
self.vae.to("cpu")
|
|
|
348 |
torch.cuda.empty_cache()
|
349 |
torch.cuda.reset_peak_memory_stats()
|
350 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -420,7 +425,10 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
420 |
|
421 |
if not output_type == "latent":
|
422 |
if self.vae.device.type == 'cpu':
|
|
|
|
|
423 |
self.vae.to("cuda")
|
|
|
424 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
425 |
video = self.vae.decode(latents, return_dict=False)[0]
|
426 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|
|
|
14 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import PipelineCallback
|
15 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import retrieve_timesteps
|
16 |
from PIL import Image
|
17 |
+
import gc
|
18 |
|
19 |
def resizecrop(image, th, tw):
|
20 |
w, h = image.size
|
|
|
241 |
else:
|
242 |
batch_size = prompt_embeds.shape[0]
|
243 |
if self.text_encoder.device.type == 'cpu':
|
244 |
+
torch.cuda.empty_cache()
|
245 |
+
torch.cuda.reset_peak_memory_stats()
|
246 |
+
self.vae.to("cuda")
|
247 |
self.text_encoder.to("cuda")
|
248 |
+
gc.collect()
|
249 |
# 3. Encode input prompt
|
250 |
(
|
251 |
prompt_embeds,
|
|
|
316 |
)
|
317 |
|
318 |
self.text_encoder.to("cpu")
|
319 |
+
gc.collect()
|
320 |
torch.cuda.empty_cache()
|
321 |
torch.cuda.reset_peak_memory_stats()
|
322 |
|
|
|
349 |
if hasattr(self, "text_encoder_to_cpu"):
|
350 |
self.text_encoder_to_cpu()
|
351 |
self.vae.to("cpu")
|
352 |
+
gc.collect()
|
353 |
torch.cuda.empty_cache()
|
354 |
torch.cuda.reset_peak_memory_stats()
|
355 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
|
|
425 |
|
426 |
if not output_type == "latent":
|
427 |
if self.vae.device.type == 'cpu':
|
428 |
+
torch.cuda.empty_cache()
|
429 |
+
torch.cuda.reset_peak_memory_stats()
|
430 |
self.vae.to("cuda")
|
431 |
+
gc.collect()
|
432 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
433 |
video = self.vae.decode(latents, return_dict=False)[0]
|
434 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|