diff --git a/benchmarks/assets/openx-favicon.png b/benchmarks/assets/openx-favicon.png new file mode 100644 index 0000000..e0c2c7b Binary files /dev/null and b/benchmarks/assets/openx-favicon.png differ diff --git a/benchmarks/assets/openx-textmark.png b/benchmarks/assets/openx-textmark.png new file mode 100644 index 0000000..ac07893 Binary files /dev/null and b/benchmarks/assets/openx-textmark.png differ diff --git a/benchmarks/prompts/openx-intro-15s.json b/benchmarks/prompts/openx-intro-15s.json new file mode 100644 index 0000000..911b8a1 --- /dev/null +++ b/benchmarks/prompts/openx-intro-15s.json @@ -0,0 +1,29 @@ +{ + "title": "OpenX Flow — 15-Second Product Intro", + "description": "A benchmark demonstration: 3 scenes explaining OpenX Flow, generated from a brand image using Wan 2.2 I2V on Modal A100.", + "resolution": "480p", + "fps": 16, + "clip_duration": 5, + "model": "Wan2.2-T2V-14B", + "scenes": [ + { + "id": 1, + "type": "i2v", + "prompt": "A cinematic tech logo reveal. The OpenX Flow logo emerges from a dark void with a subtle glow, particles of light floating around it. The camera slowly zooms in. Dark moody lighting, professional product video style, high quality, 4K render.", + "first_frame": "assets/openx-favicon.png", + "duration": 5 + }, + { + "id": 2, + "type": "t2v", + "prompt": "A futuristic holographic interface displaying a video editing timeline with multiple scenes. AI-generated video clips appear one by one in the timeline. Blue and green holographic glow, dark background, cinematic tech product demo, smooth camera movement.", + "duration": 5 + }, + { + "id": 3, + "type": "t2v", + "prompt": "A grid of AI-generated video thumbnails being automatically published to social media platforms. Icons of YouTube, TikTok animate onto the screen. A pipeline visualization shows data flowing from left to right. Dark tech aesthetic, professional motion graphics style.", + "duration": 5 + } + ] +} diff --git a/benchmarks/results/benchmark-results.json b/benchmarks/results/benchmark-results.json new file mode 100644 index 0000000..089bc9c --- /dev/null +++ b/benchmarks/results/benchmark-results.json @@ -0,0 +1,17 @@ +{ + "model": "Wan2.2-TI2V-5B", + "gpu": "A100-40GB", + "resolution": "832x480", + "num_frames": 81, + "fps": 16, + "inference_steps": 30, + "type": "i2v", + "prompt": "A cinematic tech logo reveal. The OpenX Flow logo emerges from a dark void with a subtle glow, particles of light floating around it. The camera slowly zooms in. Dark moody lighting, professional product video style, high quality, 4K render.", + "model_load_time_s": 225.6, + "generation_time_s": 54.1, + "export_time_s": 1.9, + "total_gpu_time_s": 281.6, + "wall_clock_time_s": 289.3, + "video_size_kb": 241.7, + "estimated_cost_usd": 0.1527 +} \ No newline at end of file diff --git a/benchmarks/results/samples/scene_1_i2v.mp4 b/benchmarks/results/samples/scene_1_i2v.mp4 new file mode 100644 index 0000000..7743efd Binary files /dev/null and b/benchmarks/results/samples/scene_1_i2v.mp4 differ diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py new file mode 100644 index 0000000..7e7da09 --- /dev/null +++ b/benchmarks/run_benchmark.py @@ -0,0 +1,188 @@ +""" +OpenX Flow Benchmark — Wan 2.2 TI2V-5B on Modal A100. + +Generates a video clip from a brand image + prompt. +Records timing and saves the output. + +Usage: + modal run benchmarks/run_benchmark.py +""" + +import json +import time +from pathlib import Path + +import modal + +app = modal.App("openx-flow-benchmark") + +wan_image = ( + modal.Image.debian_slim(python_version="3.11") + .pip_install( + "torch==2.6.0", + "torchvision", + "diffusers>=0.33.0", + "transformers>=4.49.0", + "accelerate>=1.4.0", + "sentencepiece", + "imageio[ffmpeg]", + "Pillow", + "numpy", + "ftfy", + ) +) + + +@app.function( + image=wan_image, + gpu="A100-40GB", + timeout=900, + memory=32768, +) +def generate_clip(prompt: str, first_frame_bytes: bytes | None = None) -> dict: + """Generate a single 5s clip with Wan 2.2 TI2V-5B.""" + import torch + import imageio + import numpy as np + import io + from PIL import Image + + t_start = time.time() + + from diffusers import WanImageToVideoPipeline, WanPipeline + + if first_frame_bytes: + pipe = WanImageToVideoPipeline.from_pretrained( + "Wan-AI/Wan2.2-TI2V-5B-Diffusers", + torch_dtype=torch.float16, + ) + pipe.to("cuda") + + image = Image.open(io.BytesIO(first_frame_bytes)).convert("RGB").resize((832, 480)) + t_load = time.time() + + output = pipe( + image=image, + prompt=prompt, + num_frames=81, + guidance_scale=5.0, + num_inference_steps=30, + ) + else: + pipe = WanPipeline.from_pretrained( + "Wan-AI/Wan2.2-TI2V-5B-Diffusers", + torch_dtype=torch.float16, + ) + pipe.to("cuda") + t_load = time.time() + + output = pipe( + prompt=prompt, + num_frames=81, + guidance_scale=5.0, + num_inference_steps=30, + height=480, + width=832, + ) + + t_gen = time.time() + + # Export frames to mp4 + frames = output.frames[0] + buf = io.BytesIO() + writer = imageio.get_writer(buf, format="mp4", fps=16, codec="libx264") + for frame in frames: + writer.append_data(np.array(frame)) + writer.close() + video_bytes = buf.getvalue() + + t_end = time.time() + + return { + "model_load_time": round(t_load - t_start, 1), + "generation_time": round(t_gen - t_load, 1), + "export_time": round(t_end - t_gen, 1), + "total_time": round(t_end - t_start, 1), + "num_frames": 81, + "video_size_kb": round(len(video_bytes) / 1024, 1), + "video_bytes": video_bytes, + } + + +@app.local_entrypoint() +def main(): + """Run benchmark: 1 scene to prove the pipeline works.""" + prompts_file = Path("benchmarks/prompts/openx-intro-15s.json") + results_dir = Path("benchmarks/results") + samples_dir = results_dir / "samples" + samples_dir.mkdir(parents=True, exist_ok=True) + + with open(prompts_file) as f: + config = json.load(f) + + scene = config["scenes"][0] + + print("🎬 OpenX Flow Benchmark") + print(f" Model: Wan2.2-TI2V-5B") + print(f" Resolution: 832x480") + print(f" Frames: 81 (5s @ 16fps)") + print(f" Steps: 30") + print(f" Scene: {scene['type'].upper()}") + print(f" Prompt: {scene['prompt'][:60]}...") + print() + + first_frame_bytes = None + if scene.get("first_frame"): + frame_path = Path("benchmarks") / scene["first_frame"] + if frame_path.exists(): + first_frame_bytes = frame_path.read_bytes() + print(f" First frame: {frame_path} ({len(first_frame_bytes)//1024}KB)") + + print() + print("⏳ Starting generation (this includes cold start + model download)...") + t_wall_start = time.time() + + result = generate_clip.remote( + prompt=scene["prompt"], + first_frame_bytes=first_frame_bytes, + ) + + t_wall_end = time.time() + wall_time = round(t_wall_end - t_wall_start, 1) + + # Save video + video_path = samples_dir / "scene_1_i2v.mp4" + video_path.write_bytes(result["video_bytes"]) + + # Save metrics + metrics = { + "model": "Wan2.2-TI2V-5B", + "gpu": "A100-40GB", + "resolution": "832x480", + "num_frames": 81, + "fps": 16, + "inference_steps": 30, + "type": scene["type"], + "prompt": scene["prompt"], + "model_load_time_s": result["model_load_time"], + "generation_time_s": result["generation_time"], + "export_time_s": result["export_time"], + "total_gpu_time_s": result["total_time"], + "wall_clock_time_s": wall_time, + "video_size_kb": result["video_size_kb"], + "estimated_cost_usd": round(wall_time / 3600 * 1.90, 4), + } + + metrics_path = results_dir / "benchmark-results.json" + with open(metrics_path, "w") as f: + json.dump(metrics, f, indent=2) + + print(f"✓ Video saved: {video_path} ({result['video_size_kb']:.0f} KB)") + print() + print("📊 Results:") + print(f" Model load: {result['model_load_time']}s") + print(f" Generation: {result['generation_time']}s") + print(f" Export: {result['export_time']}s") + print(f" Total (GPU): {result['total_time']}s") + print(f" Wall clock: {wall_time}s") + print(f" Est. cost: ${metrics['estimated_cost_usd']:.4f}")