From c97bae16c7d87c3648fe27725b89b863f6d09a90 Mon Sep 17 00:00:00 2001 From: Jae-Won Chung Date: Thu, 19 Sep 2024 19:51:43 -0400 Subject: [PATCH] Updated diffusion benchmark and data --- .gitignore | 2 +- .../pegasus/A100/queue_1gpu.yaml | 8 +- .../pegasus/H100/queue_1gpu.yaml | 8 +- .../scripts/aggregate_leaderboard_models.py | 2 +- .../scripts/benchmark_one_datapoint.py | 41 ++++++---- .../scripts/benchmark_one_model.py | 76 ++++++++++--------- .../sharegpt4video/extract_first_frame.py | 2 +- .../pegasus/A100/queue_1gpu.yaml | 2 +- .../scripts/aggregate_leaderboard_models.py | 3 +- .../scripts/benchmark_one_datapoint.py | 57 ++++++++++++-- .../scripts/benchmark_one_model.py | 38 +++++----- .../pegasus/A100/queue_1gpu.yaml | 2 +- .../pegasus/H100/queue_1gpu.yaml | 2 +- .../scripts/aggregate_leaderboard_models.py | 3 +- .../scripts/benchmark_one_datapoint.py | 22 +++--- .../scripts/benchmark_one_model.py | 72 +++++++++--------- .../i2vgen-xl/bs1+steps25+frames16.json | 4 +- .../i2vgen-xl/bs2+steps25+frames16.json | 4 +- .../bs1+steps25+frames25.json | 4 +- .../bs2+steps25+frames25.json | 9 +++ .../bs3+steps25+frames25.json | 9 +++ .../bs1+steps25+frames14.json | 4 +- .../bs2+steps25+frames14.json | 9 +++ .../bs3+steps25+frames14.json | 9 +++ .../i2vgen-xl/bs1+steps25+frames16.json | 4 +- .../i2vgen-xl/bs2+steps25+frames16.json | 4 +- .../bs1+steps25+frames25.json | 4 +- .../bs2+steps25+frames25.json | 9 +++ .../bs3+steps25+frames25.json | 9 +++ .../bs1+steps25+frames14.json | 4 +- .../bs2+steps25+frames14.json | 9 +++ .../bs3+steps25+frames14.json | 9 +++ .../kandinsky-2-2-decoder/bs1+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs16+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs2+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs32+steps25.json | 8 ++ .../kandinsky-2-2-decoder/bs4+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs8+steps25.json | 4 +- .../kandinsky-3/bs1+steps25.json | 4 +- .../kandinsky-3/bs2+steps25.json | 4 +- .../openjourney-v4/bs1+steps25.json | 4 +- .../openjourney-v4/bs16+steps25.json | 4 +- .../openjourney-v4/bs2+steps25.json | 4 +- .../openjourney-v4/bs32+steps25.json | 8 ++ .../openjourney-v4/bs4+steps25.json | 4 +- .../openjourney-v4/bs64+steps25.json | 8 ++ .../openjourney-v4/bs8+steps25.json | 4 +- .../segmind/SSD-1B/bs1+steps25.json | 4 +- .../segmind/SSD-1B/bs2+steps25.json | 4 +- .../segmind/SSD-1B/bs4+steps25.json | 4 +- .../segmind/SSD-1B/bs8+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs1+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs16+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs2+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs4+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs8+steps25.json | 4 +- .../stable-diffusion-2-1/bs1+steps25.json | 4 +- .../stable-diffusion-2-1/bs16+steps25.json | 4 +- .../stable-diffusion-2-1/bs2+steps25.json | 4 +- .../stable-diffusion-2-1/bs32+steps25.json | 8 ++ .../stable-diffusion-2-1/bs4+steps25.json | 4 +- .../stable-diffusion-2-1/bs8+steps25.json | 4 +- .../bs1+steps25.json | 4 +- .../bs2+steps25.json | 4 +- .../bs4+steps25.json | 4 +- .../bs8+steps25.json | 4 +- .../bs1+steps25.json | 4 +- .../bs2+steps25.json | 4 +- .../bs4+steps25.json | 4 +- .../bs8+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs1+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs16+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs2+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs32+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs4+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs64+steps25.json | 4 +- .../kandinsky-2-2-decoder/bs8+steps25.json | 4 +- .../kandinsky-3/bs1+steps25.json | 4 +- .../kandinsky-3/bs2+steps25.json | 4 +- .../kandinsky-3/bs4+steps25.json | 4 +- .../kandinsky-3/bs8+steps25.json | 4 +- .../openjourney-v4/bs1+steps25.json | 4 +- .../openjourney-v4/bs16+steps25.json | 4 +- .../openjourney-v4/bs2+steps25.json | 4 +- .../openjourney-v4/bs32+steps25.json | 4 +- .../openjourney-v4/bs4+steps25.json | 4 +- .../openjourney-v4/bs64+steps25.json | 4 +- .../openjourney-v4/bs8+steps25.json | 4 +- .../segmind/SSD-1B/bs1+steps25.json | 4 +- .../segmind/SSD-1B/bs16+steps25.json | 4 +- .../segmind/SSD-1B/bs2+steps25.json | 4 +- .../segmind/SSD-1B/bs4+steps25.json | 4 +- .../segmind/SSD-1B/bs8+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs1+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs16+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs2+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs32+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs4+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs64+steps25.json | 4 +- .../stabilityai/sdxl-turbo/bs8+steps25.json | 4 +- .../stable-diffusion-2-1/bs1+steps25.json | 4 +- .../stable-diffusion-2-1/bs16+steps25.json | 4 +- .../stable-diffusion-2-1/bs2+steps25.json | 4 +- .../stable-diffusion-2-1/bs32+steps25.json | 4 +- .../stable-diffusion-2-1/bs4+steps25.json | 4 +- .../stable-diffusion-2-1/bs64+steps25.json | 4 +- .../stable-diffusion-2-1/bs8+steps25.json | 4 +- .../bs1+steps25.json | 4 +- .../bs16+steps25.json | 4 +- .../bs2+steps25.json | 4 +- .../bs32+steps25.json | 4 +- .../bs4+steps25.json | 4 +- .../bs8+steps25.json | 4 +- .../bs1+steps25.json | 4 +- .../bs16+steps25.json | 4 +- .../bs2+steps25.json | 4 +- .../bs4+steps25.json | 4 +- .../bs8+steps25.json | 4 +- .../bs1+steps25+frames16.json | 4 +- .../bs16+steps25+frames16.json | 9 +++ .../bs2+steps25+frames16.json | 4 +- .../bs4+steps25+frames16.json | 4 +- .../bs8+steps25+frames16.json | 4 +- .../bs1+steps25+frames16.json | 4 +- .../bs2+steps25+frames16.json | 4 +- .../bs4+steps25+frames16.json | 4 +- .../bs1+steps25+frames16.json | 4 +- .../bs16+steps25+frames16.json | 4 +- .../bs2+steps25+frames16.json | 4 +- .../bs4+steps25+frames16.json | 4 +- .../bs8+steps25+frames16.json | 4 +- .../bs1+steps25+frames16.json | 4 +- .../bs2+steps25+frames16.json | 4 +- .../bs4+steps25+frames16.json | 4 +- 134 files changed, 525 insertions(+), 348 deletions(-) create mode 100644 data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json create mode 100644 data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json create mode 100644 data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json create mode 100644 data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json create mode 100644 data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json create mode 100644 data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json create mode 100644 data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json create mode 100644 data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json create mode 100644 data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json create mode 100644 data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json create mode 100644 data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json create mode 100644 data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json create mode 100644 data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json diff --git a/.gitignore b/.gitignore index f9a2da9..5b7b7d3 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ build/ # Data files *.log -pegasus/consumed.yaml +figures/ diff --git a/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml index 12c945c..fe18d4d 100644 --- a/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/image-to-video/pegasus/A100/queue_1gpu.yaml @@ -1,6 +1,6 @@ - command: - - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 8 4 2 1 --power-limits 400 --num-inference-steps 25" + - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50" model: - - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt' - - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14' - - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25' + - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720' + - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576' + - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576' diff --git a/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml b/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml index 3602b7c..724a9ed 100644 --- a/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml +++ b/benchmark/diffusion/image-to-video/pegasus/H100/queue_1gpu.yaml @@ -1,6 +1,6 @@ - command: - - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_700.json --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 25" + - "python scripts/benchmark_one_model.py {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 4 3 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50" model: - - '--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt' - - '--model stabilityai/stable-video-diffusion-img2vid --num-frames 14' - - '--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25' + - "--model ali-vilab/i2vgen-xl --num-frames 16 --add-text-prompt --width 1280 --height 720" + - "--model stabilityai/stable-video-diffusion-img2vid --num-frames 14 --width 1024 --height 576" + - "--model stabilityai/stable-video-diffusion-img2vid-xt --num-frames 25 --width 1024 --height 576" diff --git a/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py index 6f894e8..066fa49 100644 --- a/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/image-to-video/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py index 653b925..b514426 100644 --- a/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/image-to-video/scripts/benchmark_one_datapoint.py @@ -27,10 +27,10 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_infernece_steps: int - num_frames: int power_limit: int batch_size: int + num_inference_steps: int + num_frames: int num_prompts: int total_runtime: float = 0.0 total_energy: float = 0.0 @@ -80,6 +80,7 @@ def load_text_image_prompts( path: str, batch_size: int, num_batches: int | None = None, + image_resize: tuple[int, int] | None = None, ) -> tuple[int, list[tuple[list[str], list[Image.Image]]]]: """Load the dataset to feed the model and return it as a list of batches of prompts. @@ -93,6 +94,9 @@ def load_text_image_prompts( dataset = json.load(open(path)) assert len(dataset["caption"]) == len(dataset["video_id"]) + dataset["caption"] *= 10 + dataset["video_id"] *= 10 + if num_batches is not None: if len(dataset["caption"]) < num_batches * batch_size: raise ValueError("Not enough data for the requested number of batches.") @@ -103,6 +107,8 @@ def load_text_image_prompts( dataset["first_frame"] = [ load_image(str(image_path / f"{video_id}.jpg")) for video_id in dataset["video_id"] ] + if image_resize is not None: + dataset["first_frame"] = [image.resize(image_resize) for image in dataset["first_frame"]] batched = [ (dataset["caption"][i : i + batch_size], dataset["first_frame"][i : i + batch_size]) @@ -135,8 +141,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" video_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -150,11 +156,16 @@ def benchmark(args: argparse.Namespace) -> None: pynvml.nvmlInit() handle = pynvml.nvmlDeviceGetHandleByIndex(0) gpu_model = pynvml.nvmlDeviceGetName(handle) - pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED) - pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000) + # pynvml.nvmlDeviceSetPersistenceMode(handle, pynvml.NVML_FEATURE_ENABLED) + # pynvml.nvmlDeviceSetPowerManagementLimit(handle, args.power_limit * 1000) pynvml.nvmlShutdown() - num_prompts, batched_prompts = load_text_image_prompts(args.dataset_path, args.batch_size, args.num_batches) + num_prompts, batched_prompts = load_text_image_prompts( + args.dataset_path, + args.batch_size, + args.num_batches, + (args.width, args.height), + ) pipeline = get_pipeline(args.model) @@ -189,7 +200,7 @@ def benchmark(args: argparse.Namespace) -> None: fps_param_name = fps_param_name_candidates[0] torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) # Build common parameter dict for all batches params: dict[str, Any] = dict( @@ -210,15 +221,15 @@ def benchmark(args: argparse.Namespace) -> None: if args.add_text_prompt: params["prompt"] = intermediate.prompts - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) frames = pipeline(**params).frames - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.frames = frames intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") results: list[Result] = [] @@ -255,10 +266,10 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_infernece_steps=args.num_inference_steps, - num_frames=args.num_frames, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + num_frames=args.num_frames, num_prompts=num_prompts, total_runtime=measurements.time, total_energy=measurements.total_energy, @@ -289,8 +300,8 @@ def benchmark(args: argparse.Namespace) -> None: parser.add_argument("--num-inference-steps", type=int, default=50, help="The number of denoising steps.") parser.add_argument("--num-frames", type=int, default=1, help="The number of frames to generate.") parser.add_argument("--fps", type=int, default=16, help="Frames per second for micro-conditioning.") - parser.add_argument("--height", type=int, help="Height of the generated video.") - parser.add_argument("--width", type=int, help="Width of the generated video.") + parser.add_argument("--height", type=int, required=True, help="Height of the generated video.") + parser.add_argument("--width", type=int, required=True, help="Width of the generated video.") parser.add_argument("--num-batches", type=int, default=None, help="The number of batches to use from the dataset.") parser.add_argument("--save-every", type=int, default=10, help="Save generations to file every N prompts.") parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.") diff --git a/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py b/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py index 4fee82f..0bf3aeb 100644 --- a/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/image-to-video/scripts/benchmark_one_model.py @@ -28,44 +28,48 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ - "docker", "run", - "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', - "--cap-add", "SYS_ADMIN", - "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}", - "--rm", - "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", - "-v", f"{os.getcwd()}:/workspace/image-to-video", - "mlenergy/leaderboard:diffusion-i2v", - "--dataset-path", args.dataset_path, - "--result-root", args.result_root, - "--batch-size", batch_size, - "--num-batches", "10", - "--power-limit", power_limit, - "--model", args.model, - "--huggingface-token", hf_token, - "--num-frames", args.num_frames, - "--num-inference-steps", args.num_inference_steps, - ] + (["--add-text-prompt"] if args.add_text_prompt else []), - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + with subprocess.Popen( + args=[ + "docker", "run", + "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', + "--cap-add", "SYS_ADMIN", + "--name", f"leaderboard-i2v-{''.join(args.gpu_ids)}", + "--rm", + "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", + "-v", f"{os.getcwd()}:/workspace/image-to-video", + "mlenergy/leaderboard:diffusion-i2v", + "--dataset-path", args.dataset_path, + "--result-root", args.result_root, + "--batch-size", batch_size, + "--num-batches", "8", + "--power-limit", power_limit, + "--model", args.model, + "--huggingface-token", hf_token, + "--num-frames", args.num_frames, + "--num-inference-steps", num_inference_steps, + "--width", str(args.width), + "--height", str(args.height), + ] + (["--add-text-prompt"] if args.add_text_prompt else []), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -77,8 +81,10 @@ def main(args: argparse.Namespace) -> None: parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") parser.add_argument("--num-frames", type=str, help="Number of frames to generate") - parser.add_argument("--num-inference-steps", type=str, help="Number of denoising steps") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "30", "40", "50"], help="Number of inference steps to run") parser.add_argument("--add-text-prompt", action="store_true", help="Input text prompt alongside image.") + parser.add_argument("--height", type=int, required=True, help="Height of the generated video.") + parser.add_argument("--width", type=int, required=True, help="Width of the generated video.") parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.") args = parser.parse_args() main(args) diff --git a/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py b/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py index 6647653..ed808b2 100644 --- a/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py +++ b/benchmark/diffusion/image-to-video/sharegpt4video/extract_first_frame.py @@ -3,7 +3,7 @@ import cv2 -DATASET_PATH = "sharegpt4video_700.json" +DATASET_PATH = "sharegpt4video_100.json" def main() -> None: diff --git a/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml index 7eae3fc..247f982 100644 --- a/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-image/pegasus/A100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --num-inference-steps 1 2 4 8 16 25 30 40 50 --power-limits 400" model: - stabilityai/stable-diffusion-2-1 - stabilityai/stable-diffusion-xl-base-1.0 diff --git a/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py index fb4865e..066fa49 100644 --- a/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/text-to-image/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" @@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None: nickname=model_name.split("/")[-1].replace("-", " ").title(), total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())), denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]), + resolution="NA", ) assert model_name not in models models[model_name] = model_info diff --git a/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py index f42d60c..b719d6b 100644 --- a/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/text-to-image/scripts/benchmark_one_datapoint.py @@ -1,8 +1,10 @@ from __future__ import annotations import os +import time import json import argparse +import multiprocessing as mp from pprint import pprint from pathlib import Path from contextlib import suppress @@ -11,6 +13,7 @@ import torch import pynvml import numpy as np +import pandas as pd from PIL import Image from datasets import load_dataset, Dataset from transformers.trainer_utils import set_seed @@ -35,9 +38,9 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_inference_steps: int power_limit: int batch_size: int + num_inference_steps: int num_prompts: int average_clip_score: float = 0.0 total_runtime: float = 0.0 @@ -118,6 +121,28 @@ def load_partiprompts( return len(batched) * batch_size, batched +def power_monitor(csv_path: str, gpu_indices: list[int], chan: mp.SimpleQueue) -> None: + pynvml.nvmlInit() + handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in gpu_indices] + + fields = [ + (pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_GPU), + (pynvml.NVML_FI_DEV_POWER_AVERAGE, pynvml.NVML_POWER_SCOPE_MEMORY), + ] + + columns = ["timestamp"] + sum([[f"gpu{i}", f"vram{i}"] for i in gpu_indices], []) + power: list[list] = [] + while chan.empty(): + row = [time.monotonic()] + values = [pynvml.nvmlDeviceGetFieldValues(h, fields) for h in handles] + for value in values: + row.extend((value[0].value.uiVal, value[1].value.uiVal)) + power.append(row) + time.sleep(max(0.0, 0.1 - (time.monotonic() - row[0]))) + + pd.DataFrame(power, columns=columns).to_csv(csv_path, index=False) + + def calculate_clip_score( model: CLIPModel, processor: CLIPProcessor, @@ -183,8 +208,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + image_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" image_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -222,27 +247,42 @@ def benchmark(args: argparse.Namespace) -> None: ResultIntermediateBatched(prompts=batch) for batch in batched_prompts ] + pmon = None + pmon_chan = None + if args.monitor_power: + pmon_chan = mp.SimpleQueue() + pmon = mp.get_context("spawn").Process( + target=power_monitor, + args=(f"{benchmark_name}+power.csv", [g.gpu_index for g in zeus_monitor.gpus.gpus], pmon_chan), + ) + pmon.start() + torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) for ind, intermediate in enumerate(intermediates): print(f"Batch {ind + 1}/{len(intermediates)}") - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) images = pipeline( intermediate.prompts, generator=rng, num_inference_steps=args.num_inference_steps, output_type="np", ).images - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.images = images intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") + if pmon is not None and pmon_chan is not None: + pmon_chan.put("stop") + pmon.join(timeout=5.0) + pmon.terminate() + # Scale images to [0, 256] and convert to uint8 for intermediate in intermediates: intermediate.images = (intermediate.images * 255).astype("uint8") @@ -292,9 +332,9 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_inference_steps=args.num_inference_steps, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, num_prompts=num_prompts, average_clip_score=sum(r.clip_score for r in results) / len(results), total_runtime=measurements.time, @@ -326,6 +366,7 @@ def benchmark(args: argparse.Namespace) -> None: parser.add_argument("--image-save-every", type=int, default=10, help="Save images to file every N prompts.") parser.add_argument("--seed", type=int, default=0, help="The seed to use for the RNG.") parser.add_argument("--huggingface-token", type=str, help="The HuggingFace token to use.") + parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.") args = parser.parse_args() benchmark(args) diff --git a/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py b/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py index 26deb32..a71e245 100644 --- a/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/text-to-image/scripts/benchmark_one_model.py @@ -28,12 +28,13 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + cmd=[ "docker", "run", "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', "--cap-add", "SYS_ADMIN", @@ -48,22 +49,21 @@ def main(args: argparse.Namespace) -> None: "--power-limit", power_limit, "--model", args.model, "--huggingface-token", hf_token, - "--num-inference-steps", "25", - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + "--num-inference-steps", num_inference_steps, + ] + if args.monitor_power: + cmd.append("--monitor-power") + with subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -74,5 +74,7 @@ def main(args: argparse.Namespace) -> None: parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use") parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of inference steps to run") + parser.add_argument("--monitor-power", default=False, action="store_true", help="Whether to monitor power over time.") args = parser.parse_args() main(args) diff --git a/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml b/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml index 32921e5..182e9d0 100644 --- a/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-video/pegasus/A100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 16 8 4 2 1 --power-limits 400 --num-inference-steps 25 --num-frames 16" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 400 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16" model: - ali-vilab/text-to-video-ms-1.7b - guoyww/animatediff-motion-adapter-v1-5-3 diff --git a/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml b/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml index 318690c..a7f7520 100644 --- a/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml +++ b/benchmark/diffusion/text-to-video/pegasus/H100/queue_1gpu.yaml @@ -1,5 +1,5 @@ - command: - - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_700.json --gpu-ids {{ gpu }} --batch-sizes 64 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 25 --num-frames 16" + - "python scripts/benchmark_one_model.py --model {{ model }} --result-root results/joule --dataset-path sharegpt4video/sharegpt4video_100.json --gpu-ids {{ gpu }} --batch-sizes 32 16 8 4 2 1 --power-limits 700 --num-inference-steps 1 2 4 8 16 25 30 40 50 --num-frames 16" model: - ali-vilab/text-to-video-ms-1.7b - guoyww/animatediff-motion-adapter-v1-5-3 diff --git a/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py b/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py index fb4865e..066fa49 100644 --- a/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py +++ b/benchmark/diffusion/text-to-video/scripts/aggregate_leaderboard_models.py @@ -15,7 +15,7 @@ def main(results_dir: Path, output_file: Path) -> None: for model_dir in sorted(glob(f"{results_dir}/*/*")): model_name = "/".join(model_dir.split("/")[-2:]) print(f" {model_name}") - result_file_cand = glob(f"{model_dir}/bs1+*+results.json") + result_file_cand = glob(f"{model_dir}/bs1+*+steps25+results.json") assert len(result_file_cand) == 1, model_name results_data = json.load(open(result_file_cand[0])) denosing_module_name = "unet" if "unet" in results_data["num_parameters"] else "transformer" @@ -24,6 +24,7 @@ def main(results_dir: Path, output_file: Path) -> None: nickname=model_name.split("/")[-1].replace("-", " ").title(), total_params=raw_params_to_readable(sum(results_data["num_parameters"].values())), denoising_params=raw_params_to_readable(results_data["num_parameters"][denosing_module_name]), + resolution="NA", ) assert model_name not in models models[model_name] = model_info diff --git a/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py b/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py index 7e75928..aeca806 100644 --- a/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py +++ b/benchmark/diffusion/text-to-video/scripts/benchmark_one_datapoint.py @@ -32,10 +32,10 @@ class Results: model: str num_parameters: dict[str, int] gpu_model: str - num_inference_steps: int - num_frames: int power_limit: int batch_size: int + num_inference_steps: int + num_frames: int num_prompts: int total_runtime: float = 0.0 total_energy: float = 0.0 @@ -119,7 +119,7 @@ def load_text_prompts( Returns: Total number of prompts and a list of batches of prompts. """ - dataset = json.load(open(path))["caption"] + dataset = json.load(open(path))["caption"] * 10 if num_batches is not None: if len(dataset) < num_batches * batch_size: raise ValueError("Dataset is too small for the given number of batches.") @@ -151,8 +151,8 @@ def benchmark(args: argparse.Namespace) -> None: results_dir = Path(args.result_root) / args.model results_dir.mkdir(parents=True, exist_ok=True) - benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}") - video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+generated" + benchmark_name = str(results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}") + video_dir = results_dir / f"bs{args.batch_size}+pl{args.power_limit}+steps{args.num_inference_steps}+generated" video_dir.mkdir(exist_ok=True) arg_out_filename = f"{benchmark_name}+args.json" @@ -190,7 +190,7 @@ def benchmark(args: argparse.Namespace) -> None: ] torch.cuda.reset_peak_memory_stats(device="cuda:0") - zeus_monitor.begin_window("benchmark", sync_cuda=False) + zeus_monitor.begin_window("benchmark", sync_execution=False) # Build common parameter dict for all batches params: dict[str, Any] = dict( @@ -208,15 +208,15 @@ def benchmark(args: argparse.Namespace) -> None: params["prompt"] = intermediate.prompts - zeus_monitor.begin_window("batch", sync_cuda=False) + zeus_monitor.begin_window("batch", sync_execution=False) frames = pipeline(**params).frames - batch_measurements = zeus_monitor.end_window("batch", sync_cuda=False) + batch_measurements = zeus_monitor.end_window("batch", sync_execution=False) intermediate.frames = frames intermediate.batch_latency = batch_measurements.time intermediate.batch_energy = batch_measurements.total_energy - measurements = zeus_monitor.end_window("benchmark", sync_cuda=False) + measurements = zeus_monitor.end_window("benchmark", sync_execution=False) peak_memory = torch.cuda.max_memory_allocated(device="cuda:0") results: list[Result] = [] @@ -253,10 +253,10 @@ def benchmark(args: argparse.Namespace) -> None: model=args.model, num_parameters=count_parameters(pipeline), gpu_model=gpu_model, - num_inference_steps=args.num_inference_steps, - num_frames=args.num_frames, power_limit=args.power_limit, batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + num_frames=args.num_frames, num_prompts=num_prompts, total_runtime=measurements.time, total_energy=measurements.total_energy, diff --git a/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py b/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py index 781892f..9668515 100644 --- a/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py +++ b/benchmark/diffusion/text-to-video/scripts/benchmark_one_model.py @@ -28,44 +28,46 @@ def main(args: argparse.Namespace) -> None: print_and_write(outfile, f"Benchmarking {args.model}\n") print_and_write(outfile, f"Batch sizes: {args.batch_sizes}\n") print_and_write(outfile, f"Power limits: {args.power_limits}\n") + print_and_write(outfile, f"Number of inference steps: {args.num_inference_steps}\n") for batch_size in args.batch_sizes: for power_limit in args.power_limits: - print_and_write(outfile, f"{batch_size=}, {power_limit=}\n", flush=True) - with subprocess.Popen( - args=[ - "docker", "run", - "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', - "--cap-add", "SYS_ADMIN", - "--name", f"leaderboard-t2v-{''.join(args.gpu_ids)}", - "--rm", - "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", - "-v", f"{os.getcwd()}:/workspace/text-to-video", - "mlenergy/leaderboard:diffusion-t2v", - "--result-root", args.result_root, - "--batch-size", batch_size, - "--num-batches", "10", - "--power-limit", power_limit, - "--model", args.model, - "--dataset-path", args.dataset_path, - "--huggingface-token", hf_token, - "--num-inference-steps", args.num_inference_steps, - "--num-frames", args.num_frames, - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) as proc: - if proc.stdout: - i = 0 - for line in proc.stdout: - print_and_write(outfile, line, flush=i % 50 == 0) - i += 1 + for num_inference_steps in args.num_inference_steps: + print_and_write(outfile, f"{batch_size=}, {power_limit=}, {num_inference_steps=}\n", flush=True) + with subprocess.Popen( + args=[ + "docker", "run", + "--gpus", '"device=' + ','.join(args.gpu_ids) + '"', + "--cap-add", "SYS_ADMIN", + "--name", f"leaderboard-t2v-{''.join(args.gpu_ids)}", + "--rm", + "-v", "/data/leaderboard/hfcache:/root/.cache/huggingface", + "-v", f"{os.getcwd()}:/workspace/text-to-video", + "mlenergy/leaderboard:diffusion-t2v", + "--result-root", args.result_root, + "--batch-size", batch_size, + "--num-batches", "10", + "--power-limit", power_limit, + "--model", args.model, + "--dataset-path", args.dataset_path, + "--huggingface-token", hf_token, + "--num-inference-steps", num_inference_steps, + "--num-frames", args.num_frames, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) as proc: + if proc.stdout: + i = 0 + for line in proc.stdout: + print_and_write(outfile, line, flush=i % 50 == 0) + i += 1 - # If proc exited with non-zero status, it's probably an OOM. - # Move on to the next batch size. - if proc.returncode != 0: - break + # If proc exited with non-zero status, it's probably an OOM. + # Move on to the next batch size. + if proc.returncode != 0: + break @@ -76,7 +78,7 @@ def main(args: argparse.Namespace) -> None: parser.add_argument("--gpu-ids", type=str, nargs="+", help="GPU IDs to use") parser.add_argument("--batch-sizes", type=str, nargs="+", default=["8", "4", "2", "1"], help="Batch sizes to benchmark") parser.add_argument("--power-limits", type=str, nargs="+", default=["400", "300", "200"], help="Power limits to benchmark") - parser.add_argument("--num-inference-steps", type=str, required=True, help="Number of denoising steps") + parser.add_argument("--num-inference-steps", type=str, nargs="+", default=["1", "2", "4", "8", "16", "25", "30", "40", "50"], help="Number of denoising steps") parser.add_argument("--num-frames", type=str, required=True, help="Number of frames to generate") parser.add_argument("--dataset-path", type=str, help="Path to the dataset JSON file.") args = parser.parse_args() diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json index f4ef6d2..33eb52e 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 16348.217100000009, - "Batch latency (s)": 44.41898396015167, + "Energy/video (J)": 16915.850124999997, + "Batch latency (s)": 46.14208295941353, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json index db07d02..1b42eba 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 16091.048200000008, - "Batch latency (s)": 85.8618726491928, + "Energy/video (J)": 16496.045437499997, + "Batch latency (s)": 89.03019031882286, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json index 60ef57c..db242df 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid-xt", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 15346.527300000005, - "Batch latency (s)": 42.11920440196991, + "Energy/video (J)": 15709.767625000095, + "Batch latency (s)": 42.397395104169846, "Batch size": 1, "Denoising steps": 25, "Frames": 25 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json new file mode 100644 index 0000000..c3aaf60 --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 15291.016625000047, + "Batch latency (s)": 82.90474811196327, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json new file mode 100644 index 0000000..139155f --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 14761.389999999976, + "Batch latency (s)": 120.65004900523594, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json index 81599cc..280d06e 100644 --- a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 8803.383999999985, - "Batch latency (s)": 24.10387804508209, + "Energy/video (J)": 9066.434124999912, + "Batch latency (s)": 24.369865357875824, "Batch size": 1, "Denoising steps": 25, "Frames": 14 diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json new file mode 100644 index 0000000..4a33884 --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 8835.22312499996, + "Batch latency (s)": 47.65615049004555, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json new file mode 100644 index 0000000..69fe154 --- /dev/null +++ b/data/diffusion/image-to-video/A100-SXM4-40GB/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 8683.536285714292, + "Batch latency (s)": 70.55723374230521, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json index 458f226..24e16e9 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 14222.658400000026, - "Batch latency (s)": 22.950254821777342, + "Energy/video (J)": 14867.419125000015, + "Batch latency (s)": 23.717748790979385, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json index b18a013..182e4da 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/ali-vilab/i2vgen-xl/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/i2vgen-xl", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 13657.628800000017, - "Batch latency (s)": 42.94859471321106, + "Energy/video (J)": 14348.508499999996, + "Batch latency (s)": 44.71498331427574, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json index d475395..c608cac 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs1+steps25+frames25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid-xt", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 13366.447699999995, - "Batch latency (s)": 20.89660472869873, + "Energy/video (J)": 13392.813624999952, + "Batch latency (s)": 20.788252592086792, "Batch size": 1, "Denoising steps": 25, "Frames": 25 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json new file mode 100644 index 0000000..34ffaa4 --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs2+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 12901.83275000006, + "Batch latency (s)": 39.99498334527016, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json new file mode 100644 index 0000000..7b974a9 --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid-xt/bs3+steps25+frames25.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid-xt", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 12790.552809523862, + "Batch latency (s)": 59.380911929266794, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 25 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json index 8c35e86..56d7638 100644 --- a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs1+steps25+frames14.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-video-diffusion-img2vid", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 7550.921200000029, - "Batch latency (s)": 12.265265846252442, + "Energy/video (J)": 7623.074500000104, + "Batch latency (s)": 12.191031396389008, "Batch size": 1, "Denoising steps": 25, "Frames": 14 diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json new file mode 100644 index 0000000..df7125d --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs2+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 7416.721437499975, + "Batch latency (s)": 23.368041068315506, + "Batch size": 2, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json new file mode 100644 index 0000000..863f0b1 --- /dev/null +++ b/data/diffusion/image-to-video/H100 80GB HBM3/stabilityai/stable-video-diffusion-img2vid/bs3+steps25+frames14.json @@ -0,0 +1,9 @@ +{ + "Model": "stabilityai/stable-video-diffusion-img2vid", + "GPU": "NVIDIA H100 80GB HBM3", + "Energy/video (J)": 7354.00133333333, + "Batch latency (s)": 34.5100462777274, + "Batch size": 3, + "Denoising steps": 25, + "Frames": 14 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json index 3d45658..a68072a 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 329.6848999999929, - "Batch latency (s)": 1.808762288093567, + "Energy/image (J)": 324.06850000005215, + "Batch latency (s)": 1.6537675857543945, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json index f2c21cb..cc971fa 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 174.24531874999812, - "Batch latency (s)": 7.439638161659241, + "Energy/image (J)": 172.51030000000029, + "Batch latency (s)": 7.375234842300415, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json index 3eb9add..bc08770 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 232.40825000000186, - "Batch latency (s)": 1.640995717048645, + "Energy/image (J)": 230.3378000000026, + "Batch latency (s)": 1.5861663103103638, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json new file mode 100644 index 0000000..f7233a0 --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "kandinsky-community/kandinsky-2-2-decoder", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 163.0797656249997, + "Batch latency (s)": 13.998618459701538, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json index 9822704..2d8ce8e 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 202.8745750000002, - "Batch latency (s)": 2.3463359832763673, + "Energy/image (J)": 200.16462499999906, + "Batch latency (s)": 2.299217462539673, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json index 0c9b9f0..37450b5 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 187.65767500000075, - "Batch latency (s)": 4.030062103271485, + "Energy/image (J)": 184.9021625000052, + "Batch latency (s)": 4.0124232292175295, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json index 2f24dd0..f04f1d9 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 914.0325000000187, - "Batch latency (s)": 3.1329710721969604, + "Energy/image (J)": 930.2532999999821, + "Batch latency (s)": 3.0359585523605346, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json index 64d0878..76aa047 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/kandinsky-community/kandinsky-3/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 875.4787999999942, - "Batch latency (s)": 5.2747025966644285, + "Energy/image (J)": 895.7575500000036, + "Batch latency (s)": 5.261959171295166, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json index f6f9bd5..0c9ec1d 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 235.712099999981, - "Batch latency (s)": 1.0208970069885255, + "Energy/image (J)": 227.21699999999254, + "Batch latency (s)": 0.9210062503814698, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json index aad1d2e..30469a4 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 157.4185124999989, - "Batch latency (s)": 6.579187059402466, + "Energy/image (J)": 156.51368749999673, + "Batch latency (s)": 6.559858226776123, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json index 28e86a3..ca6fd19 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 196.30995000000112, - "Batch latency (s)": 1.1641260623931884, + "Energy/image (J)": 188.78500000000932, + "Batch latency (s)": 1.1187455892562865, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json new file mode 100644 index 0000000..6be148d --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "prompthero/openjourney-v4", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 154.23499999999768, + "Batch latency (s)": 12.850126147270203, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json index 4eb8c05..03b5558 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 177.43804999999702, - "Batch latency (s)": 1.884285831451416, + "Energy/image (J)": 175.33082500000017, + "Batch latency (s)": 1.8664743423461914, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json new file mode 100644 index 0000000..ef6d36f --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs64+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "prompthero/openjourney-v4", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 150.57691875000017, + "Batch latency (s)": 25.000647592544556, + "Batch size": 64, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json index e101e6e..b91c187 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/prompthero/openjourney-v4/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 162.92667500000098, - "Batch latency (s)": 3.505508875846863, + "Energy/image (J)": 163.7534500000067, + "Batch latency (s)": 3.423132634162903, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json index 5021be5..b5beef6 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 717.2012000000104, - "Batch latency (s)": 1.9508831262588502, + "Energy/image (J)": 745.7899999999441, + "Batch latency (s)": 1.9644724607467652, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json index 4ea6ea5..d21b87b 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 681.1273499999894, - "Batch latency (s)": 3.633535361289978, + "Energy/image (J)": 700.4580500000156, + "Batch latency (s)": 3.6897377252578734, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json index c9084a0..be83817 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 672.6853499999968, - "Batch latency (s)": 7.193562436103821, + "Energy/image (J)": 688.6121250000084, + "Batch latency (s)": 7.168970584869385, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json index 86c82bd..a629442 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/segmind/SSD-1B/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 688.7974999999976, - "Batch latency (s)": 14.561952710151672, + "Energy/image (J)": 697.7047875000047, + "Batch latency (s)": 14.703205680847168, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json index fb90e6d..df948b1 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 372.9794999999925, - "Batch latency (s)": 2.0116413831710815, + "Energy/image (J)": 414.02970000002534, + "Batch latency (s)": 2.0992990016937254, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json index 8c4ec6a..402ce40 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 234.4104250000004, - "Batch latency (s)": 9.666603064537048, + "Energy/image (J)": 242.709375, + "Batch latency (s)": 9.941586756706238, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json index 8dd4789..95f0f83 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 289.37170000000623, - "Batch latency (s)": 2.0955519914627074, + "Energy/image (J)": 343.14144999999553, + "Batch latency (s)": 2.6075665235519407, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json index 8f97b2d..b6565c8 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 249.84987499999698, - "Batch latency (s)": 2.723399114608765, + "Energy/image (J)": 260.6321250000037, + "Batch latency (s)": 2.6943087577819824, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json index b5914d7..1e3b03b 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/sdxl-turbo/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 231.1957374999998, - "Batch latency (s)": 4.824169707298279, + "Energy/image (J)": 239.71523749999469, + "Batch latency (s)": 4.928032088279724, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json index a3f9bd7..54b6a61 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 437.2461000000127, - "Batch latency (s)": 1.1940542221069337, + "Energy/image (J)": 431.7285000000149, + "Batch latency (s)": 1.1978053092956542, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json index 0e2147c..7250162 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 345.6643937500019, - "Batch latency (s)": 14.331708741188049, + "Energy/image (J)": 349.6556749999989, + "Batch latency (s)": 14.506024074554443, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json index b22ad07..cdbff65 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 397.6420500000007, - "Batch latency (s)": 2.0922271490097044, + "Energy/image (J)": 397.4403999999631, + "Batch latency (s)": 2.0987526416778564, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json new file mode 100644 index 0000000..97eb38f --- /dev/null +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs32+steps25.json @@ -0,0 +1,8 @@ +{ + "Model": "stabilityai/stable-diffusion-2-1", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/image (J)": 344.0007781249995, + "Batch latency (s)": 28.606084370613097, + "Batch size": 32, + "Denoising steps": 25 +} \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json index 1037a7f..9b73041 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 369.5769999999902, - "Batch latency (s)": 3.833626127243042, + "Energy/image (J)": 370.8419500000076, + "Batch latency (s)": 3.870126795768738, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json index 58ffc43..0ca5b1a 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-2-1/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 352.8435999999987, - "Batch latency (s)": 7.322762203216553, + "Energy/image (J)": 357.5101125000045, + "Batch latency (s)": 7.4118963241577145, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json index 0f953f7..f99e6da 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1447.3444000000134, - "Batch latency (s)": 3.7704660654067994, + "Energy/image (J)": 1457.9797000000253, + "Batch latency (s)": 3.7812204360961914, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json index a9b8b27..bc363cc 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1398.8784500000068, - "Batch latency (s)": 7.196404767036438, + "Energy/image (J)": 1417.0265999999829, + "Batch latency (s)": 7.296204352378846, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json index 59b084e..738121c 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1340.9431999999913, - "Batch latency (s)": 13.802976179122926, + "Energy/image (J)": 1376.6305249999975, + "Batch latency (s)": 14.180507826805115, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json index 2be9b99..c4e504d 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1340.4551500000002, - "Batch latency (s)": 27.784875440597535, + "Energy/image (J)": 1353.8191374999938, + "Batch latency (s)": 28.03936712741852, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json index dd1047d..a5f4b22 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1064.0938000000083, - "Batch latency (s)": 2.820074677467346, + "Energy/image (J)": 1104.114100000076, + "Batch latency (s)": 2.8582629680633547, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json index 2201d30..c2cca9a 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 994.3445500000147, - "Batch latency (s)": 5.212948894500732, + "Energy/image (J)": 1023.2370500000194, + "Batch latency (s)": 5.309733629226685, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json index 9defeb3..626ce8f 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1004.3355749999988, - "Batch latency (s)": 10.405498218536376, + "Energy/image (J)": 1028.9778500000016, + "Batch latency (s)": 10.595553398132324, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json index fa520af..6e9c732 100644 --- a/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json +++ b/data/diffusion/text-to-image/A100-SXM4-40GB/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/image (J)": 1010.2810624999984, - "Batch latency (s)": 21.15771155357361, + "Energy/image (J)": 1039.4479500000016, + "Batch latency (s)": 21.538306522369385, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json index 75b4f19..c7ea97a 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 329.0941999999806, - "Batch latency (s)": 1.3033519506454467, + "Energy/image (J)": 316.22510000001637, + "Batch latency (s)": 1.2899317026138306, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json index c016517..98ce74a 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 145.14649999999966, - "Batch latency (s)": 3.6811126232147218, + "Energy/image (J)": 145.30115625000326, + "Batch latency (s)": 3.6781134366989137, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json index 69e577a..82e2c3d 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 222.06345000001602, - "Batch latency (s)": 1.374358630180359, + "Energy/image (J)": 221.32535000001081, + "Batch latency (s)": 1.3821177244186402, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json index 76375f5..a31efe8 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 136.49577499999867, - "Batch latency (s)": 6.730837726593018, + "Energy/image (J)": 135.961328125, + "Batch latency (s)": 6.7253422975540165, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json index 3f3fb25..940447b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 177.44447500000243, - "Batch latency (s)": 1.5876455783843995, + "Energy/image (J)": 173.91712500001304, + "Batch latency (s)": 1.573417329788208, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json index 91ad556..d7e4c22 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 131.1387625000003, - "Batch latency (s)": 12.868691635131835, + "Energy/image (J)": 130.85997968750016, + "Batch latency (s)": 12.837305545806885, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json index 1eef640..4ed393f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-2-2-decoder/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-2-2-decoder", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 155.2100000000035, - "Batch latency (s)": 2.180539679527283, + "Energy/image (J)": 155.19958750000222, + "Batch latency (s)": 2.1782283782958984, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json index 842085c..4b74487 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 860.5385999999941, - "Batch latency (s)": 2.3725571155548097, + "Energy/image (J)": 848.3177000001073, + "Batch latency (s)": 2.351728391647339, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json index fe42200..a713379 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 727.0428500000155, - "Batch latency (s)": 2.8992382049560548, + "Energy/image (J)": 716.0031000000424, + "Batch latency (s)": 2.8724076986312865, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json index 58835db..de76372 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 677.2662250000052, - "Batch latency (s)": 4.657700920104981, + "Energy/image (J)": 682.126500000013, + "Batch latency (s)": 4.641835880279541, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json index f79ab3f..3fcf25b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/kandinsky-community/kandinsky-3/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "kandinsky-community/kandinsky-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 644.7600749999983, - "Batch latency (s)": 8.445084881782531, + "Energy/image (J)": 655.685175000003, + "Batch latency (s)": 8.430445384979247, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json index 86a5cf8..d4a8503 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 243.28739999998362, - "Batch latency (s)": 0.855378270149231, + "Energy/image (J)": 232.24340000003576, + "Batch latency (s)": 0.8665567636489868, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json index c7d9819..1a83837 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 136.89135625000054, - "Batch latency (s)": 3.2747750997543337, + "Energy/image (J)": 135.34282499999972, + "Batch latency (s)": 3.239760994911194, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json index 2532035..e97f5fa 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 182.5311500000069, - "Batch latency (s)": 0.9119171619415283, + "Energy/image (J)": 175.78234999999404, + "Batch latency (s)": 0.9105970144271851, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json index dbfd64f..af8e795 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 132.28641249999927, - "Batch latency (s)": 6.297622609138489, + "Energy/image (J)": 130.1285124999995, + "Batch latency (s)": 6.225514149665832, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json index 0622229..c3a581e 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 154.80212499999908, - "Batch latency (s)": 1.0733203649520875, + "Energy/image (J)": 148.74832499998155, + "Batch latency (s)": 1.1036246299743653, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json index 7e61eae..f9b0d73 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 128.81374062500035, - "Batch latency (s)": 12.13134765625, + "Energy/image (J)": 128.0899343750003, + "Batch latency (s)": 12.070884728431702, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json index b14b29f..4367e09 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/prompthero/openjourney-v4/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "prompthero/openjourney-v4", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 143.28031249999768, - "Batch latency (s)": 1.7443701505661011, + "Energy/image (J)": 139.91437499999302, + "Batch latency (s)": 1.74277982711792, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json index 7e63d7f..613b1af 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 625.1895999999717, - "Batch latency (s)": 1.38781898021698, + "Energy/image (J)": 666.479899999965, + "Batch latency (s)": 1.3885040760040284, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json index cb6a2bb..c12deb7 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 497.0412249999994, - "Batch latency (s)": 11.863849401473999, + "Energy/image (J)": 513.199212499999, + "Batch latency (s)": 12.176180934906006, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json index 28e6380..91e675a 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 569.9780000000028, - "Batch latency (s)": 1.8244082450866699, + "Energy/image (J)": 592.11455000001, + "Batch latency (s)": 1.8233376026153565, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json index cae2262..f44e8b9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 540.4337500000023, - "Batch latency (s)": 3.2643563508987428, + "Energy/image (J)": 544.8429999999935, + "Batch latency (s)": 3.286959099769592, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json index 033e90a..8bb536f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/segmind/SSD-1B/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "segmind/SSD-1B", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 509.72669999999925, - "Batch latency (s)": 6.086679577827454, + "Energy/image (J)": 522.7006874999963, + "Batch latency (s)": 6.223434543609619, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json index 2762b1a..59f162f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 432.39010000000707, - "Batch latency (s)": 2.015624976158142, + "Energy/image (J)": 419.5822999998927, + "Batch latency (s)": 1.9746390342712403, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json index 0ced434..167a8e9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 180.98546874999883, - "Batch latency (s)": 4.272360563278198, + "Energy/image (J)": 179.42289374999237, + "Batch latency (s)": 4.241718673706055, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json index ecd7094..015bae5 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 305.55374999998605, - "Batch latency (s)": 2.05529043674469, + "Energy/image (J)": 295.792149999924, + "Batch latency (s)": 2.0774401664733886, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json index 9aaf269..611686f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 173.50129687500012, - "Batch latency (s)": 8.135975241661072, + "Energy/image (J)": 174.0245281249983, + "Batch latency (s)": 8.14413080215454, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json index 5171163..13bb08e 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 231.15972499999916, - "Batch latency (s)": 2.187738275527954, + "Energy/image (J)": 230.345924999984, + "Batch latency (s)": 2.223876476287842, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json index de41796..c9be182 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 167.10275156249992, - "Batch latency (s)": 15.62219078540802, + "Energy/image (J)": 166.73651874999922, + "Batch latency (s)": 15.59785532951355, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json index a674ed4..e7cd7ed 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/sdxl-turbo/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/sdxl-turbo", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 201.1591625000001, - "Batch latency (s)": 2.4453672647476195, + "Energy/image (J)": 198.7015374999959, + "Batch latency (s)": 2.459192657470703, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json index d54e941..d81b612 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 402.25479999999516, - "Batch latency (s)": 0.9360565900802612, + "Energy/image (J)": 386.4609999999404, + "Batch latency (s)": 0.9410791873931885, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json index 9e9cb85..23e014d 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 295.6397187499999, - "Batch latency (s)": 7.022916412353515, + "Energy/image (J)": 295.0764937500004, + "Batch latency (s)": 7.02507450580597, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json index 0742ce9..ac4f1d9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 347.869849999994, - "Batch latency (s)": 1.1735167980194092, + "Energy/image (J)": 341.5639000000432, + "Batch latency (s)": 1.1783596992492675, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json index c649054..9745565 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 288.93712499999964, - "Batch latency (s)": 13.71097764968872, + "Energy/image (J)": 289.42614687500173, + "Batch latency (s)": 13.744895315170288, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json index 4863ae4..fa741a3 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 328.95322500000475, - "Batch latency (s)": 1.9817272901535035, + "Energy/image (J)": 323.79292500000446, + "Batch latency (s)": 1.9873192310333252, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json index e3fa33d..1484244 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs64+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 279.46398281250003, - "Batch latency (s)": 26.251275372505187, + "Energy/image (J)": 279.7018828125001, + "Batch latency (s)": 26.282402443885804, "Batch size": 64, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json index 36b3361..5c4eac4 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-2-1/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-2-1", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 307.12399999999906, - "Batch latency (s)": 3.64911527633667, + "Energy/image (J)": 305.03673749999143, + "Batch latency (s)": 3.63439359664917, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json index 93b2555..b1aaa27 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1219.5621000000276, - "Batch latency (s)": 1.8901970863342286, + "Energy/image (J)": 1256.9625999998302, + "Batch latency (s)": 1.9030212879180908, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json index a52245f..c7defb0 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1101.8141374999977, - "Batch latency (s)": 26.064258456230164, + "Energy/image (J)": 1115.0813562500057, + "Batch latency (s)": 26.295916223526, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json index b6763f8..799d595 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1164.132700000005, - "Batch latency (s)": 3.4976581573486327, + "Energy/image (J)": 1187.2511500000953, + "Batch latency (s)": 3.544024109840393, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json index 679ff0e..8d3afbf 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs32+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1103.1854343750006, - "Batch latency (s)": 52.5412620306015, + "Energy/image (J)": 1111.7095656249962, + "Batch latency (s)": 52.63584921360016, "Batch size": 32, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json index 082b34b..3f0838f 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1124.7332500000018, - "Batch latency (s)": 6.681292104721069, + "Energy/image (J)": 1141.1115500000305, + "Batch latency (s)": 6.750077819824218, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json index 1adfbb1..6053567 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-3-medium-diffusers/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-3-medium-diffusers", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 1105.709275000001, - "Batch latency (s)": 13.087377643585205, + "Energy/image (J)": 1124.4100500000175, + "Batch latency (s)": 13.255334210395812, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json index d15517a..52dd447 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs1+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 973.6592999999878, - "Batch latency (s)": 2.2974732398986815, + "Energy/image (J)": 969.3598000001163, + "Batch latency (s)": 2.2578482627868652, "Batch size": 1, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json index f5e9a3d..ac6bde9 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs16+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 733.4240562499996, - "Batch latency (s)": 17.358140754699708, + "Energy/image (J)": 737.6392125000013, + "Batch latency (s)": 17.373131418228148, "Batch size": 16, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json index 38dadd4..3d349ec 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs2+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 848.840699999989, - "Batch latency (s)": 2.6611390113830566, + "Energy/image (J)": 868.7413499999791, + "Batch latency (s)": 2.683417248725891, "Batch size": 2, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json index 0491a96..a63a91d 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs4+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 789.0821250000038, - "Batch latency (s)": 4.744464302062989, + "Energy/image (J)": 794.8234249999747, + "Batch latency (s)": 4.7213153600692745, "Batch size": 4, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json index 0568d56..979aa4b 100644 --- a/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json +++ b/data/diffusion/text-to-image/H100 80GB HBM3/stabilityai/stable-diffusion-xl-base-1.0/bs8+steps25.json @@ -1,8 +1,8 @@ { "Model": "stabilityai/stable-diffusion-xl-base-1.0", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/image (J)": 752.1045125000004, - "Batch latency (s)": 8.926730370521545, + "Energy/image (J)": 756.267812499986, + "Batch latency (s)": 8.936180830001831, "Batch size": 8, "Denoising steps": 25 } \ No newline at end of file diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json index 16902b5..e3da32a 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1168.710700000031, - "Batch latency (s)": 3.279584217071533, + "Energy/video (J)": 1153.5816999999806, + "Batch latency (s)": 3.23746497631073, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json new file mode 100644 index 0000000..fdfcf2c --- /dev/null +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json @@ -0,0 +1,9 @@ +{ + "Model": "ali-vilab/text-to-video-ms-1.7b", + "GPU": "NVIDIA A100-SXM4-40GB", + "Energy/video (J)": 1002.3830562499992, + "Batch latency (s)": 42.21276063919068, + "Batch size": 16, + "Denoising steps": 25, + "Frames": 16 +} \ No newline at end of file diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json index 2d5a958..5e15332 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1083.7407000000123, - "Batch latency (s)": 5.75505154132843, + "Energy/video (J)": 1088.2321500000078, + "Batch latency (s)": 5.810182595252991, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json index 07e9c28..a4636e2 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 1031.8761250000098, - "Batch latency (s)": 10.693570613861084, + "Energy/video (J)": 1039.8423750000075, + "Batch latency (s)": 10.828980302810669, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json index 9b941b9..175bccd 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 995.5903875000018, - "Batch latency (s)": 20.66424689292908, + "Energy/video (J)": 1001.8907250000047, + "Batch latency (s)": 20.911450886726378, "Batch size": 8, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json index d608d57..0025f2c 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3613.7171999999787, - "Batch latency (s)": 9.765414237976074, + "Energy/video (J)": 3808.3875, + "Batch latency (s)": 9.97215178012848, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json index fb8c9ad..b196f26 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3613.7226499999874, - "Batch latency (s)": 19.047373509407045, + "Energy/video (J)": 3714.9077000000048, + "Batch latency (s)": 19.319639086723328, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json index fe48e4b..5ed74a0 100644 --- a/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/A100-SXM4-40GB/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA A100-SXM4-40GB", - "Energy/video (J)": 3579.347100000002, - "Batch latency (s)": 38.19397940635681, + "Energy/video (J)": 3723.060124999983, + "Batch latency (s)": 38.925279235839845, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json index a3cb4e1..9ca43d3 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 972.1845000000205, - "Batch latency (s)": 1.972856879234314, + "Energy/video (J)": 956.181299999915, + "Batch latency (s)": 1.9992478847503663, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json index 9dc988d..a564b92 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs16+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 816.0705249999999, - "Batch latency (s)": 19.527364635467528, + "Energy/video (J)": 812.8134187500109, + "Batch latency (s)": 19.53088092803955, "Batch size": 16, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json index 614a330..07543b3 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 889.6341500000096, - "Batch latency (s)": 3.0206708192825316, + "Energy/video (J)": 894.7264999999664, + "Batch latency (s)": 3.0091302156448365, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json index 7e4cdb5..38848c7 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 864.8537000000011, - "Batch latency (s)": 5.358541631698609, + "Energy/video (J)": 858.7707249999978, + "Batch latency (s)": 5.359495830535889, "Batch size": 4, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json index ecda3cc..a29c909 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/ali-vilab/text-to-video-ms-1.7b/bs8+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "ali-vilab/text-to-video-ms-1.7b", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 825.4678125000034, - "Batch latency (s)": 10.00869529247284, + "Energy/video (J)": 824.5901250000112, + "Batch latency (s)": 9.995107746124267, "Batch size": 8, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json index 3e44f5a..f8c65b9 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs1+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 3202.3456999999935, - "Batch latency (s)": 5.03039321899414, + "Energy/video (J)": 3220.4186000000686, + "Batch latency (s)": 5.014125680923462, "Batch size": 1, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json index c73e26d..d33b851 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs2+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 3080.158299999987, - "Batch latency (s)": 9.498830604553223, + "Energy/video (J)": 3143.508899999969, + "Batch latency (s)": 9.543243718147277, "Batch size": 2, "Denoising steps": 25, "Frames": 16 diff --git a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json index e8f6bc6..b43f88b 100644 --- a/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json +++ b/data/diffusion/text-to-video/H100 80GB HBM3/guoyww/animatediff-motion-adapter-v1-5-3/bs4+steps25+frames16.json @@ -1,8 +1,8 @@ { "Model": "guoyww/animatediff-motion-adapter-v1-5-3", "GPU": "NVIDIA H100 80GB HBM3", - "Energy/video (J)": 2999.9867499999937, - "Batch latency (s)": 18.28913300037384, + "Energy/video (J)": 3071.656475000037, + "Batch latency (s)": 18.417469120025636, "Batch size": 4, "Denoising steps": 25, "Frames": 16