Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added benchmarks/assets/openx-favicon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added benchmarks/assets/openx-textmark.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 29 additions & 0 deletions benchmarks/prompts/openx-intro-15s.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"title": "OpenX Flow β€” 15-Second Product Intro",
"description": "A benchmark demonstration: 3 scenes explaining OpenX Flow, generated from a brand image using Wan 2.2 I2V on Modal A100.",
"resolution": "480p",
"fps": 16,
"clip_duration": 5,
"model": "Wan2.2-T2V-14B",
"scenes": [
{
"id": 1,
"type": "i2v",
"prompt": "A cinematic tech logo reveal. The OpenX Flow logo emerges from a dark void with a subtle glow, particles of light floating around it. The camera slowly zooms in. Dark moody lighting, professional product video style, high quality, 4K render.",
"first_frame": "assets/openx-favicon.png",
"duration": 5
},
{
"id": 2,
"type": "t2v",
"prompt": "A futuristic holographic interface displaying a video editing timeline with multiple scenes. AI-generated video clips appear one by one in the timeline. Blue and green holographic glow, dark background, cinematic tech product demo, smooth camera movement.",
"duration": 5
},
{
"id": 3,
"type": "t2v",
"prompt": "A grid of AI-generated video thumbnails being automatically published to social media platforms. Icons of YouTube, TikTok animate onto the screen. A pipeline visualization shows data flowing from left to right. Dark tech aesthetic, professional motion graphics style.",
"duration": 5
}
]
}
17 changes: 17 additions & 0 deletions benchmarks/results/benchmark-results.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"model": "Wan2.2-TI2V-5B",
"gpu": "A100-40GB",
"resolution": "832x480",
"num_frames": 81,
"fps": 16,
"inference_steps": 30,
"type": "i2v",
"prompt": "A cinematic tech logo reveal. The OpenX Flow logo emerges from a dark void with a subtle glow, particles of light floating around it. The camera slowly zooms in. Dark moody lighting, professional product video style, high quality, 4K render.",
"model_load_time_s": 225.6,
"generation_time_s": 54.1,
"export_time_s": 1.9,
"total_gpu_time_s": 281.6,
"wall_clock_time_s": 289.3,
"video_size_kb": 241.7,
"estimated_cost_usd": 0.1527
}
Binary file added benchmarks/results/samples/scene_1_i2v.mp4
Binary file not shown.
188 changes: 188 additions & 0 deletions benchmarks/run_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""
OpenX Flow Benchmark β€” Wan 2.2 TI2V-5B on Modal A100.

Generates a video clip from a brand image + prompt.
Records timing and saves the output.

Usage:
modal run benchmarks/run_benchmark.py
"""

import json
import time
from pathlib import Path

import modal

app = modal.App("openx-flow-benchmark")

wan_image = (
modal.Image.debian_slim(python_version="3.11")
.pip_install(
"torch==2.6.0",
"torchvision",
"diffusers>=0.33.0",
"transformers>=4.49.0",
"accelerate>=1.4.0",
"sentencepiece",
"imageio[ffmpeg]",
"Pillow",
"numpy",
"ftfy",
)
)


@app.function(
image=wan_image,
gpu="A100-40GB",
timeout=900,
memory=32768,
)
def generate_clip(prompt: str, first_frame_bytes: bytes | None = None) -> dict:
"""Generate a single 5s clip with Wan 2.2 TI2V-5B."""
import torch
import imageio
import numpy as np
import io
from PIL import Image

t_start = time.time()

from diffusers import WanImageToVideoPipeline, WanPipeline

if first_frame_bytes:
pipe = WanImageToVideoPipeline.from_pretrained(
"Wan-AI/Wan2.2-TI2V-5B-Diffusers",
torch_dtype=torch.float16,
)
pipe.to("cuda")

image = Image.open(io.BytesIO(first_frame_bytes)).convert("RGB").resize((832, 480))
t_load = time.time()

output = pipe(
image=image,
prompt=prompt,
num_frames=81,
guidance_scale=5.0,
num_inference_steps=30,
)
else:
pipe = WanPipeline.from_pretrained(
"Wan-AI/Wan2.2-TI2V-5B-Diffusers",
torch_dtype=torch.float16,
)
pipe.to("cuda")
t_load = time.time()

output = pipe(
prompt=prompt,
num_frames=81,
guidance_scale=5.0,
num_inference_steps=30,
height=480,
width=832,
)

t_gen = time.time()

# Export frames to mp4
frames = output.frames[0]
buf = io.BytesIO()
writer = imageio.get_writer(buf, format="mp4", fps=16, codec="libx264")
for frame in frames:
writer.append_data(np.array(frame))
writer.close()
video_bytes = buf.getvalue()

t_end = time.time()

return {
"model_load_time": round(t_load - t_start, 1),
"generation_time": round(t_gen - t_load, 1),
"export_time": round(t_end - t_gen, 1),
"total_time": round(t_end - t_start, 1),
"num_frames": 81,
"video_size_kb": round(len(video_bytes) / 1024, 1),
"video_bytes": video_bytes,
}


@app.local_entrypoint()
def main():
"""Run benchmark: 1 scene to prove the pipeline works."""
prompts_file = Path("benchmarks/prompts/openx-intro-15s.json")
results_dir = Path("benchmarks/results")
samples_dir = results_dir / "samples"
samples_dir.mkdir(parents=True, exist_ok=True)

with open(prompts_file) as f:
config = json.load(f)

scene = config["scenes"][0]

print("🎬 OpenX Flow Benchmark")
print(f" Model: Wan2.2-TI2V-5B")
print(f" Resolution: 832x480")
print(f" Frames: 81 (5s @ 16fps)")
print(f" Steps: 30")
print(f" Scene: {scene['type'].upper()}")
print(f" Prompt: {scene['prompt'][:60]}...")
print()

first_frame_bytes = None
if scene.get("first_frame"):
frame_path = Path("benchmarks") / scene["first_frame"]
if frame_path.exists():
first_frame_bytes = frame_path.read_bytes()
print(f" First frame: {frame_path} ({len(first_frame_bytes)//1024}KB)")

print()
print("⏳ Starting generation (this includes cold start + model download)...")
t_wall_start = time.time()

result = generate_clip.remote(
prompt=scene["prompt"],
first_frame_bytes=first_frame_bytes,
)

t_wall_end = time.time()
wall_time = round(t_wall_end - t_wall_start, 1)

# Save video
video_path = samples_dir / "scene_1_i2v.mp4"
video_path.write_bytes(result["video_bytes"])

# Save metrics
metrics = {
"model": "Wan2.2-TI2V-5B",
"gpu": "A100-40GB",
"resolution": "832x480",
"num_frames": 81,
"fps": 16,
"inference_steps": 30,
"type": scene["type"],
"prompt": scene["prompt"],
"model_load_time_s": result["model_load_time"],
"generation_time_s": result["generation_time"],
"export_time_s": result["export_time"],
"total_gpu_time_s": result["total_time"],
"wall_clock_time_s": wall_time,
"video_size_kb": result["video_size_kb"],
"estimated_cost_usd": round(wall_time / 3600 * 1.90, 4),
}

metrics_path = results_dir / "benchmark-results.json"
with open(metrics_path, "w") as f:
json.dump(metrics, f, indent=2)

print(f"βœ“ Video saved: {video_path} ({result['video_size_kb']:.0f} KB)")
print()
print("πŸ“Š Results:")
print(f" Model load: {result['model_load_time']}s")
print(f" Generation: {result['generation_time']}s")
print(f" Export: {result['export_time']}s")
print(f" Total (GPU): {result['total_time']}s")
print(f" Wall clock: {wall_time}s")
print(f" Est. cost: ${metrics['estimated_cost_usd']:.4f}")
Loading