Spaces:
Paused
Paused
Update skyreelsinfer/pipelines/pipeline_skyreels_video.py
Browse files
skyreelsinfer/pipelines/pipeline_skyreels_video.py
CHANGED
|
@@ -14,7 +14,7 @@ from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import MultiPipeli
|
|
| 14 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import PipelineCallback
|
| 15 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import retrieve_timesteps
|
| 16 |
from PIL import Image
|
| 17 |
-
|
| 18 |
|
| 19 |
def resizecrop(image, th, tw):
|
| 20 |
w, h = image.size
|
|
@@ -240,7 +240,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
| 240 |
batch_size = len(prompt)
|
| 241 |
else:
|
| 242 |
batch_size = prompt_embeds.shape[0]
|
| 243 |
-
pipe.text_encoder.to("cuda")
|
| 244 |
|
| 245 |
# 3. Encode input prompt
|
| 246 |
(
|
|
@@ -339,8 +339,8 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
| 339 |
if hasattr(self, "text_encoder_to_cpu"):
|
| 340 |
self.text_encoder_to_cpu()
|
| 341 |
pipe.text_encoder.to("cpu")
|
| 342 |
-
pipe.vae.to("cpu")
|
| 343 |
-
torch.cuda.empty_cache()
|
| 344 |
|
| 345 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
| 346 |
for i, t in enumerate(timesteps):
|
|
@@ -414,7 +414,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
| 414 |
progress_bar.update()
|
| 415 |
|
| 416 |
if not output_type == "latent":
|
| 417 |
-
|
| 418 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
| 419 |
video = self.vae.decode(latents, return_dict=False)[0]
|
| 420 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|
|
|
|
| 14 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import PipelineCallback
|
| 15 |
from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import retrieve_timesteps
|
| 16 |
from PIL import Image
|
| 17 |
+
import gc
|
| 18 |
|
| 19 |
def resizecrop(image, th, tw):
|
| 20 |
w, h = image.size
|
|
|
|
| 240 |
batch_size = len(prompt)
|
| 241 |
else:
|
| 242 |
batch_size = prompt_embeds.shape[0]
|
| 243 |
+
#pipe.text_encoder.to("cuda")
|
| 244 |
|
| 245 |
# 3. Encode input prompt
|
| 246 |
(
|
|
|
|
| 339 |
if hasattr(self, "text_encoder_to_cpu"):
|
| 340 |
self.text_encoder_to_cpu()
|
| 341 |
pipe.text_encoder.to("cpu")
|
| 342 |
+
#pipe.vae.to("cpu")
|
| 343 |
+
#torch.cuda.empty_cache()
|
| 344 |
|
| 345 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
| 346 |
for i, t in enumerate(timesteps):
|
|
|
|
| 414 |
progress_bar.update()
|
| 415 |
|
| 416 |
if not output_type == "latent":
|
| 417 |
+
# pipe.vae.to("cuda")
|
| 418 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
| 419 |
video = self.vae.decode(latents, return_dict=False)[0]
|
| 420 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|