[codex] add video frame count method#195
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces an asynchronous get_frame_count() method to the VideoSource protocol and its implementations, along with updating VideoSourceProbe to track frame counts. Feedback focuses on performance optimizations: moving the clipped video check to the top of get_frame_count() in VideoFile and VideoBytes to avoid expensive video preparation, and optimizing the frame counting logic in VideoFrameSequence to prevent heavy, unnecessary numpy array conversions.
Important
The consumer version of Gemini Code Assist on GitHub is being sunset. Starting June 18, 2026, new organization installations will be blocked, and all code review activity will officially cease on July 17, 2026.
For more details on the timeline and next steps, please review the Help Documentation.
| async def get_frame_count(self) -> int: | ||
| from refiner.video.remux import prepare_video_source | ||
|
|
||
| prepared = await prepare_video_source(video=self) | ||
| try: | ||
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | ||
| raise ValueError( | ||
| "encoded video frame count is unavailable for clipped videos" | ||
| ) | ||
| if prepared.probe is None or prepared.probe.frame_count is None: | ||
| raise ValueError(f"Video frame count is unavailable for {self.uri!r}") | ||
| return prepared.probe.frame_count | ||
| finally: | ||
| prepared.close() |
There was a problem hiding this comment.
The check for clipped videos is performed after the expensive prepare_video_source call. Moving this check to the top of the method avoids unnecessary I/O and container opening overhead when the video is clipped.
| async def get_frame_count(self) -> int: | |
| from refiner.video.remux import prepare_video_source | |
| prepared = await prepare_video_source(video=self) | |
| try: | |
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | |
| raise ValueError( | |
| "encoded video frame count is unavailable for clipped videos" | |
| ) | |
| if prepared.probe is None or prepared.probe.frame_count is None: | |
| raise ValueError(f"Video frame count is unavailable for {self.uri!r}") | |
| return prepared.probe.frame_count | |
| finally: | |
| prepared.close() | |
| async def get_frame_count(self) -> int: | |
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | |
| raise ValueError( | |
| "encoded video frame count is unavailable for clipped videos" | |
| ) | |
| from refiner.video.remux import prepare_video_source | |
| prepared = await prepare_video_source(video=self) | |
| try: | |
| if prepared.probe is None or prepared.probe.frame_count is None: | |
| raise ValueError(f"Video frame count is unavailable for {self.uri!r}") | |
| return prepared.probe.frame_count | |
| finally: | |
| prepared.close() |
| async def get_frame_count(self) -> int: | ||
| from refiner.video.remux import prepare_video_source | ||
|
|
||
| prepared = await prepare_video_source(video=self) | ||
| try: | ||
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | ||
| raise ValueError( | ||
| "encoded video frame count is unavailable for clipped videos" | ||
| ) | ||
| if prepared.probe is None or prepared.probe.frame_count is None: | ||
| source = self.uri or type(self).__name__ | ||
| raise ValueError(f"Video frame count is unavailable for {source!r}") | ||
| return prepared.probe.frame_count | ||
| finally: | ||
| prepared.close() |
There was a problem hiding this comment.
The check for clipped videos is performed after the expensive prepare_video_source call. Moving this check to the top of the method avoids unnecessary I/O and container opening overhead when the video is clipped.
| async def get_frame_count(self) -> int: | |
| from refiner.video.remux import prepare_video_source | |
| prepared = await prepare_video_source(video=self) | |
| try: | |
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | |
| raise ValueError( | |
| "encoded video frame count is unavailable for clipped videos" | |
| ) | |
| if prepared.probe is None or prepared.probe.frame_count is None: | |
| source = self.uri or type(self).__name__ | |
| raise ValueError(f"Video frame count is unavailable for {source!r}") | |
| return prepared.probe.frame_count | |
| finally: | |
| prepared.close() | |
| async def get_frame_count(self) -> int: | |
| if self.from_timestamp_s is not None or self.to_timestamp_s is not None: | |
| raise ValueError( | |
| "encoded video frame count is unavailable for clipped videos" | |
| ) | |
| from refiner.video.remux import prepare_video_source | |
| prepared = await prepare_video_source(video=self) | |
| try: | |
| if prepared.probe is None or prepared.probe.frame_count is None: | |
| source = self.uri or type(self).__name__ | |
| raise ValueError(f"Video frame count is unavailable for {source!r}") | |
| return prepared.probe.frame_count | |
| finally: | |
| prepared.close() |
| async def get_frame_count(self) -> int: | ||
| if self.frame_count is not None: | ||
| return self.frame_count | ||
| return sum(1 for _ in self.iter_frame_arrays()) |
There was a problem hiding this comment.
Using sum(1 for _ in self.iter_frame_arrays()) is highly inefficient because iter_frame_arrays() performs heavy numpy array conversions, dimension checks, clipping, and contiguous memory copies for every single frame. We can optimize this by checking if the underlying frames sequence is sized (e.g., list, tuple, array) to compute the count in O(1) time, or by iterating over the raw frames directly without numpy conversion overhead.
async def get_frame_count(self) -> int:
if self.frame_count is not None:
return self.frame_count
source = self.frames
frames = (
cast(Callable[[], Iterable[Any]], source)() if callable(source) else source
)
start_idx = (
0
if self.from_timestamp_s is None
else max(0, int(math.floor(float(self.from_timestamp_s) * self.fps)))
)
if isinstance(frames, Sequence) or hasattr(frames, "__len__"):
total_len = len(frames)
if self.to_timestamp_s is not None:
end_idx = max(
start_idx, int(math.ceil(float(self.to_timestamp_s) * self.fps))
)
end_idx = min(total_len, end_idx)
else:
end_idx = total_len
return max(0, end_idx - start_idx)
if self.to_timestamp_s is not None:
end_idx = max(
start_idx, int(math.ceil(float(self.to_timestamp_s) * self.fps))
)
else:
end_idx = None
count = 0
for index, _ in enumerate(frames):
if index < start_idx:
continue
if end_idx is not None and index >= end_idx:
break
count += 1
return count
Purpose
Add an explicit async
VideoSource.get_frame_count()API so callers can request a video frame count without makingrow.num_framesor a property hide IO.Changes
get_frame_count()to theVideoSourceprotocol and concrete video sources.VideoSourceProbewithframe_countfrom PyAV stream metadata.VideoFileandVideoBytesreturn container-reported frame counts and raise when unavailable or clipped.Validation
uv run pytest tests/test_video_decode.py tests/readers/test_zarr_reader.py tests/pipeline/test_lerobot_sink.py tests/pipeline/test_sinks.pyuv run ruff check --force-exclude src/refiner/video/types.py src/refiner/pipeline/utils/cache/decoder_cache.py tests/test_video_decode.py tests/readers/test_zarr_reader.pyuv run ty check