From 7cdece06297abbf98a5b39dac54056881d295ced Mon Sep 17 00:00:00 2001 From: zhangxu44 Date: Wed, 2 Aug 2023 03:38:29 +0000 Subject: [PATCH] lvdm Delete redundant imports --- .../lvdm/_functional_video.py | 1 - .../lvdm/_transforms_video.py | 1 - .../text_to_video_lvdm/lvdm/lvdm_model.py | 1 - .../text_to_video_lvdm/lvdm/lvdm_trainer.py | 6 +--- .../scripts/lvdm_sample_short.py | 3 -- .../scripts/lvdm_sample_text2video.py | 3 -- ...ert_orig_lvdm_short_ckpt_to_ppdiffusers.py | 2 -- ...rig_lvdm_text2video_ckpt_to_ppdiffusers.py | 2 -- .../train_lvdm_text2video.py | 1 - .../models/lvdm_attention_temporal.py | 4 --- .../ppdiffusers/models/lvdm_unet_3d.py | 5 +--- ppdiffusers/ppdiffusers/models/lvdm_vae.py | 4 +-- .../ppdiffusers/pipelines/lvdm/__init__.py | 2 +- ...latent_video_diffusion_model_text2video.py | 4 +-- .../ppdiffusers/pipelines/lvdm/video_save.py | 30 ++----------------- 15 files changed, 7 insertions(+), 62 deletions(-) diff --git a/ppdiffusers/examples/text_to_video_lvdm/lvdm/_functional_video.py b/ppdiffusers/examples/text_to_video_lvdm/lvdm/_functional_video.py index 7e0ba19e889b11..514fd3893bac6e 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/lvdm/_functional_video.py +++ b/ppdiffusers/examples/text_to_video_lvdm/lvdm/_functional_video.py @@ -1,5 +1,4 @@ import paddle -import warnings def _is_tensor_video_clip(clip): diff --git a/ppdiffusers/examples/text_to_video_lvdm/lvdm/_transforms_video.py b/ppdiffusers/examples/text_to_video_lvdm/lvdm/_transforms_video.py index 508dff4caba8f5..70361dee69ff9a 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/lvdm/_transforms_video.py +++ b/ppdiffusers/examples/text_to_video_lvdm/lvdm/_transforms_video.py @@ -1,7 +1,6 @@ import paddle import numbers import random -import warnings from . import _functional_video as F diff --git a/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_model.py b/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_model.py index 8667901e8c1e1c..182e002bd57c19 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_model.py +++ b/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_model.py @@ -22,7 +22,6 @@ import numpy as np import paddle import paddle.nn as nn -import paddle.nn.functional as F from paddlenlp.transformers import AutoTokenizer, CLIPTextModel from paddlenlp.utils.log import logger diff --git a/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_trainer.py b/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_trainer.py index 170cc10446f8ae..7abcaae538b832 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_trainer.py +++ b/ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_trainer.py @@ -13,7 +13,6 @@ # limitations under the License. import contextlib -import os import sys import paddle @@ -25,11 +24,8 @@ INTEGRATION_TO_CALLBACK, VisualDLCallback, rewrite_logs, ) -from paddlenlp.trainer.utils.helper import ( # nested_truncate, - distributed_concat, nested_concat, nested_detach, nested_numpify, - nested_truncate, ) +from paddlenlp.trainer.utils.helper import nested_detach from paddlenlp.utils.log import logger -from ppdiffusers.training_utils import unwrap_model from .frame_dataset import VideoFrameDataset from .webvid_dataset import WebVidDataset diff --git a/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_short.py b/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_short.py index 41ebe8d6c23166..50834b52ba707f 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_short.py +++ b/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_short.py @@ -1,6 +1,3 @@ -import numpy as np -import os -from PIL import Image import paddle from ppdiffusers import LVDMUncondPipeline diff --git a/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_text2video.py b/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_text2video.py index 0bc730d997eae2..090d997205891a 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_text2video.py +++ b/ppdiffusers/examples/text_to_video_lvdm/scripts/lvdm_sample_text2video.py @@ -1,6 +1,3 @@ -import numpy as np -import os -from PIL import Image import paddle from ppdiffusers import LVDMTextToVideoPipeline diff --git a/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_short_ckpt_to_ppdiffusers.py b/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_short_ckpt_to_ppdiffusers.py index 0f183d16787dd4..b13c52bb10a604 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_short_ckpt_to_ppdiffusers.py +++ b/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_short_ckpt_to_ppdiffusers.py @@ -21,8 +21,6 @@ import paddle import torch -from paddlenlp.utils.downloader import get_path_from_url - try: from omegaconf import OmegaConf except ImportError: diff --git a/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_text2video_ckpt_to_ppdiffusers.py b/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_text2video_ckpt_to_ppdiffusers.py index 04ddacc92bbcf3..39ccdbc910869d 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_text2video_ckpt_to_ppdiffusers.py +++ b/ppdiffusers/examples/text_to_video_lvdm/tools/convert_orig_lvdm_text2video_ckpt_to_ppdiffusers.py @@ -20,8 +20,6 @@ import paddle import torch -from paddlenlp.utils.downloader import get_path_from_url - try: from omegaconf import OmegaConf except ImportError: diff --git a/ppdiffusers/examples/text_to_video_lvdm/train_lvdm_text2video.py b/ppdiffusers/examples/text_to_video_lvdm/train_lvdm_text2video.py index ce036c2b2322c4..3ee460f5276199 100644 --- a/ppdiffusers/examples/text_to_video_lvdm/train_lvdm_text2video.py +++ b/ppdiffusers/examples/text_to_video_lvdm/train_lvdm_text2video.py @@ -19,7 +19,6 @@ from lvdm import ( LatentVideoDiffusion, LatentVideoDiffusionTrainer, - VideoFrameDataset, WebVidDataset, ) from lvdm.lvdm_args_text2video import WebVidDatasetArguments, TrainerArguments, ModelArguments diff --git a/ppdiffusers/ppdiffusers/models/lvdm_attention_temporal.py b/ppdiffusers/ppdiffusers/models/lvdm_attention_temporal.py index b85a9dc1ebe1a5..1950d6059a938d 100644 --- a/ppdiffusers/ppdiffusers/models/lvdm_attention_temporal.py +++ b/ppdiffusers/ppdiffusers/models/lvdm_attention_temporal.py @@ -9,7 +9,6 @@ _ppxformers_available = False import math -from typing import Optional, Any from einops import rearrange, repeat from .lvdm_util import GEGLU, exists, default, Normalize, zero_module, normalization, conv_nd from ..utils.initializer_utils import xavier_uniform_, constant_ @@ -189,9 +188,6 @@ def forward(self, x, context=None, mask=None): class MemoryEfficientCrossAttention(paddle.nn.Layer): - """https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223 - """ - def __init__(self, query_dim, context_dim=None, diff --git a/ppdiffusers/ppdiffusers/models/lvdm_unet_3d.py b/ppdiffusers/ppdiffusers/models/lvdm_unet_3d.py index fe7e74b8bd56dc..66987d5fa25f6f 100644 --- a/ppdiffusers/ppdiffusers/models/lvdm_unet_3d.py +++ b/ppdiffusers/ppdiffusers/models/lvdm_unet_3d.py @@ -2,10 +2,7 @@ from paddle.distributed.fleet.utils import recompute from abc import abstractmethod -import math from einops import rearrange -from functools import partial -import numpy as np from .lvdm_util import conv_nd, linear, avg_pool_nd, zero_module, normalization, timestep_embedding, nonlinearity from .lvdm_attention_temporal import STAttentionBlock @@ -13,7 +10,7 @@ from dataclasses import dataclass from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput, apply_forward_hook +from ..utils import BaseOutput from .modeling_utils import ModelMixin diff --git a/ppdiffusers/ppdiffusers/models/lvdm_vae.py b/ppdiffusers/ppdiffusers/models/lvdm_vae.py index 0cac1b974eb185..5cd9c056ac3f9f 100644 --- a/ppdiffusers/ppdiffusers/models/lvdm_vae.py +++ b/ppdiffusers/ppdiffusers/models/lvdm_vae.py @@ -1,11 +1,9 @@ -import nntplib -import paddle from .lvdm_distributions import DiagonalGaussianDistribution from .lvdm_aemodules3d import SamePadConv3d, Encoder, Decoder from dataclasses import dataclass from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput, apply_forward_hook +from ..utils import BaseOutput from .modeling_utils import ModelMixin from .vae import DecoderOutput diff --git a/ppdiffusers/ppdiffusers/pipelines/lvdm/__init__.py b/ppdiffusers/ppdiffusers/pipelines/lvdm/__init__.py index 0b67a5855ee5c0..7a5460ac3f75bf 100644 --- a/ppdiffusers/ppdiffusers/pipelines/lvdm/__init__.py +++ b/ppdiffusers/ppdiffusers/pipelines/lvdm/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. from dataclasses import dataclass -from typing import List, Optional, Union +from typing import List, Union import numpy as np import paddle diff --git a/ppdiffusers/ppdiffusers/pipelines/lvdm/pipeline_latent_video_diffusion_model_text2video.py b/ppdiffusers/ppdiffusers/pipelines/lvdm/pipeline_latent_video_diffusion_model_text2video.py index dd52b6e084cf28..eacd1dbeeac51c 100644 --- a/ppdiffusers/ppdiffusers/pipelines/lvdm/pipeline_latent_video_diffusion_model_text2video.py +++ b/ppdiffusers/ppdiffusers/pipelines/lvdm/pipeline_latent_video_diffusion_model_text2video.py @@ -15,14 +15,12 @@ import inspect from typing import Any, Callable, Dict, List, Optional, Union import os -from tqdm import trange import numpy as np from einops import rearrange import paddle -from packaging import version -from paddlenlp.transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer +from paddlenlp.transformers import CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict from ...models import LVDMAutoencoderKL, LVDMUNet3DModel diff --git a/ppdiffusers/ppdiffusers/pipelines/lvdm/video_save.py b/ppdiffusers/ppdiffusers/pipelines/lvdm/video_save.py index 084200a0ef5017..700be918509a71 100644 --- a/ppdiffusers/ppdiffusers/pipelines/lvdm/video_save.py +++ b/ppdiffusers/ppdiffusers/pipelines/lvdm/video_save.py @@ -8,8 +8,8 @@ import accimage except ImportError: accimage = None -from PIL import Image, ImageEnhance, ImageOps -from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union +from PIL import Image +from typing import Any, Dict, List, Optional, Tuple, Union import math try: import av @@ -53,8 +53,6 @@ def write_video(filename: str, audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc. audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream """ - # >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing(): - # _log_api_usage_once(write_video) _check_av_available() video_array = paddle.to_tensor(data=video_array).astype('uint8').numpy() if isinstance(fps, float): @@ -131,8 +129,6 @@ def make_grid(tensor: Union[paddle.Tensor, List[paddle.Tensor]], Returns: grid (Tensor): the tensor containing grid of images. """ - # >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing(): - # _log_api_usage_once(make_grid) if not paddle.is_tensor(x=tensor): if isinstance(tensor, list): for t in tensor: @@ -162,9 +158,6 @@ def make_grid(tensor: Union[paddle.Tensor, List[paddle.Tensor]], def norm_ip(img, low, high): img.clip_(min=low, max=high) - # """Class Method: *.sub_, not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*, and convert manually""" - # """Class Method: *.div_, not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*, and convert manually""" - # >>> img.sub_(low).div_(max(high - low, 1e-05)) img = img.substract(low).divide(max(high - low, 1e-05)) def norm_range(t, value_range): @@ -241,19 +234,11 @@ def to_tensor(pic) -> paddle.Tensor: Returns: Tensor: Converted image. """ - # >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing(): - # _log_api_usage_once(to_tensor) - # if not (F_pil._is_pil_image(pic) or _is_numpy(pic)): - # raise TypeError(f'pic should be PIL Image or ndarray. Got {type(pic)}') - # if _is_numpy(pic) and not _is_numpy_image(pic): - # raise ValueError( - # f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.') default_float_dtype = paddle.get_default_dtype() if isinstance(pic, np.ndarray): if pic.ndim == 2: pic = pic[:, :, (None)] img = paddle.to_tensor(data=pic.transpose((2, 0, 1))) - # >>> if isinstance(img, torch.ByteTensor): if img.dtype == paddle.uint8: return paddle.divide( img.cast(default_float_dtype), @@ -261,20 +246,13 @@ def to_tensor(pic) -> paddle.Tensor: 255, dtype=paddle.float32)) else: return img - # if accimage is not None and isinstance(pic, accimage.Image): - # nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np. - # float32) - # pic.copyto(nppic) - # return paddle.to_tensor(data=nppic).cast(default_float_dtype) mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32} img = paddle.to_tensor(data=np.array( pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) if pic.mode == '1': img = 255 * img - # img = img.reshape([pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)]) img = img.reshape([pic.size[1], pic.size[0], get_image_num_channels(pic)]) img = img.transpose(perm=(2, 0, 1)) - # >>> if isinstance(img, torch.ByteTensor): if img.dtype == paddle.uint8: return paddle.divide(img.cast(default_float_dtype), 255) else: @@ -341,7 +319,6 @@ def npz_to_video_grid(data_path, if nrow is None: nrow = int(np.ceil(np.sqrt(n))) if verbose: - # >>> frame_grids = [torchvision.utils.make_grid(fs, nrow=nrow) for fs in tqdm(frame_grids, desc='Making grids')] frame_grids = [ make_grid( fs, nrow=nrow) for fs in tqdm( @@ -349,7 +326,6 @@ def npz_to_video_grid(data_path, ] else: - # >>> frame_grids = [torchvision.utils.make_grid(fs, nrow=nrow) for fs in frame_grids] frame_grids = [make_grid(fs, nrow=nrow) for fs in frame_grids] if os.path.dirname(out_path) != '': @@ -365,8 +341,6 @@ def npz_to_video_grid(data_path, dtype = (paddle.stack(x=frame_grids) * 255).dtype frame_grids = (paddle.stack(x=frame_grids) * 255).transpose( perm=[0, 2, 3, 1]).cast(dtype) - # >>> torchvision.io.write_video(out_path, frame_grids, fps=fps, video_codec= - # 'h264', options={'crf': '10'}) write_video( out_path, frame_grids,