11
11
import cv2
12
12
from skimage import exposure
13
13
from typing import Any , Dict , List , Optional
14
+ from torch import autocast
15
+
14
16
15
17
import modules .sd_hijack
16
18
from modules import devices , prompt_parser , masking , sd_samplers , lowvram , generation_parameters_copypaste , script_callbacks , extra_networks , sd_vae_approx , scripts
@@ -186,7 +188,11 @@ def depth2img_image_conditioning(self, source_image):
186
188
return conditioning
187
189
188
190
def edit_image_conditioning (self , source_image ):
189
- conditioning_image = self .sd_model .get_first_stage_encoding (self .sd_model .encode_first_stage (source_image ))
191
+ #source_image = 2 * torch.tensor(np.array(source_image)).float() / 255 - 1
192
+ #source_image = rearrange(source_image, "h w c -> 1 c h w").to(shared.device)
193
+ #source_image = rearrange(source_image, "h w c -> 1 c h w").to(shared.device)
194
+ #conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
195
+ conditioning_image = self .sd_model .encode_first_stage (source_image ).mode ()
190
196
191
197
return conditioning_image
192
198
@@ -450,11 +456,14 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
450
456
"Size" : f"{ p .width } x{ p .height } " ,
451
457
"Model hash" : getattr (p , 'sd_model_hash' , None if not opts .add_model_hash_to_info or not shared .sd_model .sd_model_hash else shared .sd_model .sd_model_hash ),
452
458
"Model" : (None if not opts .add_model_name_to_info or not shared .sd_model .sd_checkpoint_info .model_name else shared .sd_model .sd_checkpoint_info .model_name .replace (',' , '' ).replace (':' , '' )),
459
+ "Batch size" : (None if p .batch_size < 2 else p .batch_size ),
460
+ "Batch pos" : (None if p .batch_size < 2 else position_in_batch ),
453
461
"Variation seed" : (None if p .subseed_strength == 0 else all_subseeds [index ]),
454
462
"Variation seed strength" : (None if p .subseed_strength == 0 else p .subseed_strength ),
455
463
"Seed resize from" : (None if p .seed_resize_from_w == 0 or p .seed_resize_from_h == 0 else f"{ p .seed_resize_from_w } x{ p .seed_resize_from_h } " ),
456
464
"Denoising strength" : getattr (p , 'denoising_strength' , None ),
457
465
"Conditional mask weight" : getattr (p , "inpainting_mask_weight" , shared .opts .inpainting_mask_weight ) if p .is_using_inpainting_conditioning else None ,
466
+ "Eta" : (None ),
458
467
"Clip skip" : None if clip_skip <= 1 else clip_skip ,
459
468
"ENSD" : None if opts .eta_noise_seed_delta == 0 else opts .eta_noise_seed_delta ,
460
469
}
@@ -622,15 +631,17 @@ def get_conds_with_caching(function, required_prompts, steps, cache):
622
631
if p .n_iter > 1 :
623
632
shared .state .job = f"Batch { n + 1 } out of { p .n_iter } "
624
633
634
+ print (f"c = { c } and uc = { uc } " )
625
635
with devices .without_autocast () if devices .unet_needs_upcast else devices .autocast ():
626
636
samples_ddim = p .sample (conditioning = c , unconditional_conditioning = uc , seeds = seeds , subseeds = subseeds , subseed_strength = p .subseed_strength , prompts = prompts )
627
637
628
638
x_samples_ddim = [decode_first_stage (p .sd_model , samples_ddim [i :i + 1 ].to (dtype = devices .dtype_vae ))[0 ].cpu () for i in range (samples_ddim .size (0 ))]
629
- for x in x_samples_ddim :
630
- devices .test_for_nans (x , "vae" )
639
+ # for x in x_samples_ddim:
640
+ # devices.test_for_nans(x, "vae")
631
641
632
642
x_samples_ddim = torch .stack (x_samples_ddim ).float ()
633
643
x_samples_ddim = torch .clamp ((x_samples_ddim + 1.0 ) / 2.0 , min = 0.0 , max = 1.0 )
644
+ #x_samples_ddim = 255.0 * rearrange(x_samples_ddim, "1 c h w -> h w c")
634
645
635
646
del samples_ddim
636
647
@@ -645,7 +656,7 @@ def get_conds_with_caching(function, required_prompts, steps, cache):
645
656
for i , x_sample in enumerate (x_samples_ddim ):
646
657
x_sample = 255. * np .moveaxis (x_sample .cpu ().numpy (), 0 , 2 )
647
658
x_sample = x_sample .astype (np .uint8 )
648
-
659
+ #x_sample = 255.0 * rearrange(x_sample, "1 c h w -> h w c")
649
660
if p .restore_faces :
650
661
if opts .save and not p .do_not_save_samples and opts .save_images_before_face_restoration :
651
662
images .save_image (Image .fromarray (x_sample ), p .outpath_samples , "" , seeds [i ], prompts [i ], opts .samples_format , info = infotext (n , i ), p = p , suffix = "-before-face-restoration" )
@@ -868,8 +879,8 @@ def save_intermediate(image, index):
868
879
save_intermediate (image , i )
869
880
870
881
image = images .resize_image (0 , image , target_width , target_height , upscaler_name = self .hr_upscaler )
871
- image = np .array (image ).astype (np .float32 ) / 255.0
872
- image = np .moveaxis (image , 2 , 0 )
882
+ image = np .array (image ).astype (np .float32 ) / 255.0 - 1
883
+ # image = np.moveaxis(image, 2, 0)
873
884
batch_images .append (image )
874
885
875
886
decoded_samples = torch .from_numpy (np .array (batch_images ))
@@ -901,7 +912,7 @@ def save_intermediate(image, index):
901
912
class StableDiffusionProcessingImg2Img (StableDiffusionProcessing ):
902
913
sampler = None
903
914
904
- def __init__ (self , init_images : list = None , resize_mode : int = 0 , denoising_strength : float = 0.75 , mask : Any = None , mask_blur : int = 4 , inpainting_fill : int = 0 , inpaint_full_res : bool = True , inpaint_full_res_padding : int = 0 , inpainting_mask_invert : int = 0 , initial_noise_multiplier : float = None , ** kwargs ):
915
+ def __init__ (self , init_images : list = None , resize_mode : int = 0 , denoising_strength : float = 0.75 , mask : Any = None , mask_blur : int = 4 , inpainting_fill : int = 0 , inpaint_full_res : bool = True , inpaint_full_res_padding : int = 0 , inpainting_mask_invert : int = 0 , image_cfg_scale : float = 7.5 , initial_noise_multiplier : float = None , ** kwargs ):
905
916
super ().__init__ (** kwargs )
906
917
907
918
self .init_images = init_images
@@ -916,6 +927,7 @@ def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_str
916
927
self .inpaint_full_res = inpaint_full_res
917
928
self .inpaint_full_res_padding = inpaint_full_res_padding
918
929
self .inpainting_mask_invert = inpainting_mask_invert
930
+ self .image_cfg_scale = image_cfg_scale
919
931
self .initial_noise_multiplier = opts .initial_noise_multiplier if initial_noise_multiplier is None else initial_noise_multiplier
920
932
self .mask = None
921
933
self .nmask = None
@@ -983,9 +995,16 @@ def init(self, all_prompts, all_seeds, all_subseeds):
983
995
984
996
if add_color_corrections :
985
997
self .color_corrections .append (setup_color_correction (image ))
986
-
987
- image = np .array (image ).astype (np .float32 ) / 255.0
988
- image = np .moveaxis (image , 2 , 0 )
998
+ width , height = image .size
999
+ factor = self .width / max (width , height )
1000
+ factor = math .ceil (min (width , height ) * factor / 64 ) * 64 / min (width , height )
1001
+ width = int ((width * factor ) // 64 ) * 64
1002
+ height = int ((height * factor ) // 64 ) * 64
1003
+ image = ImageOps .fit (image , (width , height ), method = Image .Resampling .LANCZOS )
1004
+
1005
+ #image = 2 * torch.tensor(np.array(image)).float() / 255 - 1
1006
+ #image = np.array(image).astype(np.float32) / 255.0
1007
+ #image = np.moveaxis(image, 2, 0)
989
1008
990
1009
imgs .append (image )
991
1010
@@ -1002,10 +1021,22 @@ def init(self, all_prompts, all_seeds, all_subseeds):
1002
1021
batch_images = np .array (imgs )
1003
1022
else :
1004
1023
raise RuntimeError (f"bad number of images passed: { len (imgs )} ; expecting { self .batch_size } or less" )
1005
-
1006
- image = torch .from_numpy (batch_images )
1007
- image = 2. * image - 1.
1008
- image = image .to (shared .device )
1024
+
1025
+ #image = torch.from_numpy(batch_images)
1026
+ #width, height = image.size
1027
+ #factor = 512 / max(width, height)
1028
+ ###factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
1029
+ #width = int((width * factor) // 64) * 64
1030
+ #height = int((height * factor) // 64) * 64
1031
+ #image = ImageOps.fit(image, (width, height), method=Image.Resampling.LANCZOS)
1032
+ ##image = 2. * image - 1.
1033
+ #image = rearrange(image, "h w c -> 1 c h w")
1034
+ #image = image.to(shared.device)
1035
+ #image = torch.from_numpy(batch_images)
1036
+ #image = 2. * image - 1.
1037
+ image = 2 * torch .tensor (np .array (image )).float () / 255 - 1
1038
+ image = rearrange (image , "h w c -> 1 c h w" ).to (shared .device )
1039
+ #image = image.to(shared.device)
1009
1040
1010
1041
self .init_latent = self .sd_model .get_first_stage_encoding (self .sd_model .encode_first_stage (image ))
1011
1042
@@ -1032,6 +1063,7 @@ def init(self, all_prompts, all_seeds, all_subseeds):
1032
1063
self .image_conditioning = self .img2img_image_conditioning (image , self .init_latent , image_mask )
1033
1064
1034
1065
def sample (self , conditioning , unconditional_conditioning , seeds , subseeds , subseed_strength , prompts ):
1066
+
1035
1067
x = create_random_tensors ([opt_C , self .height // opt_f , self .width // opt_f ], seeds = seeds , subseeds = subseeds , subseed_strength = self .subseed_strength , seed_resize_from_h = self .seed_resize_from_h , seed_resize_from_w = self .seed_resize_from_w , p = self )
1036
1068
1037
1069
if self .initial_noise_multiplier != 1.0 :
0 commit comments