From e2ad2fb28875893fd1a412d48891ec17bbfa4bc8 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 11 Mar 2026 15:52:24 +0000 Subject: [PATCH 1/4] Prevent data copy in `VideoFrame.to_ndarray()` for padded frames --- av/video/frame.py | 56 ++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/av/video/frame.py b/av/video/frame.py index 74be1128d..30c16b0e0 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -433,19 +433,26 @@ def useful_array( plane: VideoPlane, bytes_per_pixel: cython.uint = 1, dtype: str = "uint8" ): """ - Return the useful part of the VideoPlane as a single dimensional array. + Return the useful part of the VideoPlane as a strided array. - We are simply discarding any padding which was added for alignment. + We are simply creating a view that discards any padding which was added for + alignment. """ import numpy as np - total_line_size: cython.size_t = abs(plane.line_size) - useful_line_size: cython.size_t = plane.width * bytes_per_pixel - if total_line_size == useful_line_size: - return np.frombuffer(plane, dtype=dtype) - arr = np.frombuffer(plane, np.uint8) - arr = arr.reshape(-1, total_line_size)[:, 0:useful_line_size].reshape(-1) - return arr.view(np.dtype(dtype)) + dtype_obj = np.dtype(dtype) + total_line_size = abs(plane.frame.ptr.linesize[plane.index]) + itemsize = dtype_obj.itemsize + channels = bytes_per_pixel // itemsize + + if channels == 1: + shape = (plane.height, plane.width) + strides = (total_line_size, itemsize) + else: + shape = (plane.height, plane.width, channels) + strides = (total_line_size, bytes_per_pixel, itemsize) + + return np.ndarray(shape, dtype=dtype_obj, buffer=plane, strides=strides) @cython.cfunc @@ -755,19 +762,19 @@ def to_ndarray(self, channel_last=False, **kwargs): itemsize: cython.uint itemsize, dtype = _np_pix_fmt_dtypes[format_name] if len(planes) == 1: # shortcut, avoid memory copy - array = useful_array(planes[0], itemsize, dtype).reshape( - height, width, -1 - ) + array = useful_array(planes[0], itemsize, dtype) + if array.ndim == 2: + array = array[:, :, None] else: # general case array = np.empty((height, width, len(planes)), dtype=dtype) for i, plane in enumerate(planes): - array[:, :, i] = useful_array(plane, itemsize, dtype).reshape( - height, width - ) + array[:, :, i] = useful_array(plane, itemsize, dtype) array = byteswap_array(array, format_name.endswith("be")) if array.shape[2] == 1: # skip last channel for gray images return array.squeeze(2) if format_name.startswith("gbr"): # gbr -> rgb + if len(planes) == 1: + array = array.copy() # prevent mutating FFmpeg frame in-place array[:, :, :3] = array[:, :, [2, 0, 1]] if not channel_last and format_name in {"yuv444p", "yuvj444p"}: array = np.moveaxis(array, 2, 0) @@ -777,16 +784,16 @@ def to_ndarray(self, channel_last=False, **kwargs): if format_name in {"yuv420p", "yuvj420p", "yuv422p"}: return np.hstack( [ - useful_array(planes[0]), - useful_array(planes[1]), - useful_array(planes[2]), + useful_array(planes[0]).reshape(-1), + useful_array(planes[1]).reshape(-1), + useful_array(planes[2]).reshape(-1), ] ).reshape(-1, width) if format_name == "yuv422p10le": # Read planes as uint16 at their original width - y = useful_array(planes[0], 2, "uint16").reshape(height, width) - u = useful_array(planes[1], 2, "uint16").reshape(height, width // 2) - v = useful_array(planes[2], 2, "uint16").reshape(height, width // 2) + y = useful_array(planes[0], 2, "uint16") + u = useful_array(planes[1], 2, "uint16") + v = useful_array(planes[2], 2, "uint16") # Double the width of U and V by repeating each value u_full = np.repeat(u, 2, axis=1) @@ -795,7 +802,7 @@ def to_ndarray(self, channel_last=False, **kwargs): return np.stack([y, u_full, v_full], axis=2) return np.stack([y, u_full, v_full], axis=0) if format_name == "pal8": - image = useful_array(planes[0]).reshape(height, width) + image = useful_array(planes[0]) palette = ( np.frombuffer(planes[1], "i4") .astype(">i4") @@ -805,7 +812,10 @@ def to_ndarray(self, channel_last=False, **kwargs): return image, palette if format_name == "nv12": return np.hstack( - [useful_array(planes[0]), useful_array(planes[1], 2)] + [ + useful_array(planes[0]).reshape(-1), + useful_array(planes[1], 2).reshape(-1), + ] ).reshape(-1, width) raise ValueError( From b105f6e745b89568c50f1702827dcb7c0e0cd64e Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 11 Mar 2026 18:22:55 +0000 Subject: [PATCH 2/4] Simplify special cases --- av/video/frame.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/av/video/frame.py b/av/video/frame.py index 30c16b0e0..6f3e2e6dc 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -374,9 +374,13 @@ class PictureType(IntEnum): BI = lib.AV_PICTURE_TYPE_BI # BI type +_is_big_endian = cython.declare(cython.bint, sys.byteorder == "big") + + @cython.cfunc +@cython.inline def byteswap_array(array, big_endian: cython.bint): - if (sys.byteorder == "big") != big_endian: + if _is_big_endian != big_endian: return array.byteswap() return array @@ -761,21 +765,18 @@ def to_ndarray(self, channel_last=False, **kwargs): if format_name in _np_pix_fmt_dtypes: itemsize: cython.uint itemsize, dtype = _np_pix_fmt_dtypes[format_name] - if len(planes) == 1: # shortcut, avoid memory copy + num_planes: cython.size_t = len(planes) + if num_planes == 1: # shortcut, avoid memory copy array = useful_array(planes[0], itemsize, dtype) - if array.ndim == 2: - array = array[:, :, None] else: # general case - array = np.empty((height, width, len(planes)), dtype=dtype) - for i, plane in enumerate(planes): - array[:, :, i] = useful_array(plane, itemsize, dtype) + array = np.empty((height, width, num_planes), dtype=dtype) + if format_name.startswith("gbr"): + plane_indices = (2, 0, 1, *range(3, num_planes)) + else: + plane_indices = range(num_planes) + for i, p_idx in enumerate(plane_indices): + array[:, :, i] = useful_array(planes[p_idx], itemsize, dtype) array = byteswap_array(array, format_name.endswith("be")) - if array.shape[2] == 1: # skip last channel for gray images - return array.squeeze(2) - if format_name.startswith("gbr"): # gbr -> rgb - if len(planes) == 1: - array = array.copy() # prevent mutating FFmpeg frame in-place - array[:, :, :3] = array[:, :, [2, 0, 1]] if not channel_last and format_name in {"yuv444p", "yuvj444p"}: array = np.moveaxis(array, 2, 0) return array From cd5ffd0994b37069f14db0787b2b503e5dd4da5c Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 11 Mar 2026 18:41:04 +0000 Subject: [PATCH 3/4] Move duplicated if statements --- av/video/frame.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/av/video/frame.py b/av/video/frame.py index 6f3e2e6dc..5a7ba9d07 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -433,6 +433,7 @@ def copy_array_to_plane(array, plane: VideoPlane, bytes_per_pixel: cython.uint): @cython.cfunc +@cython.inline def useful_array( plane: VideoPlane, bytes_per_pixel: cython.uint = 1, dtype: str = "uint8" ): @@ -755,21 +756,21 @@ def to_ndarray(self, channel_last=False, **kwargs): # check size format_name = frame.format.name - height, width = frame.ptr.height, frame.ptr.width planes: tuple[VideoPlane, ...] = frame.planes - if format_name in {"yuv420p", "yuvj420p", "yuyv422", "yuv422p10le", "yuv422p"}: - assert width % 2 == 0, "the width has to be even for this pixel format" - assert height % 2 == 0, "the height has to be even for this pixel format" - # cases planes are simply concatenated in shape (height, width, channels) if format_name in _np_pix_fmt_dtypes: + if format_name == "yuyv422": + assert frame.ptr.width % 2 == 0, "width has to be even for yuyv422" + assert frame.ptr.height % 2 == 0, "height has to be even for yuyv422" itemsize: cython.uint itemsize, dtype = _np_pix_fmt_dtypes[format_name] num_planes: cython.size_t = len(planes) if num_planes == 1: # shortcut, avoid memory copy array = useful_array(planes[0], itemsize, dtype) else: # general case - array = np.empty((height, width, num_planes), dtype=dtype) + array = np.empty( + (frame.ptr.height, frame.ptr.width, num_planes), dtype=dtype + ) if format_name.startswith("gbr"): plane_indices = (2, 0, 1, *range(3, num_planes)) else: @@ -783,14 +784,18 @@ def to_ndarray(self, channel_last=False, **kwargs): # special cases if format_name in {"yuv420p", "yuvj420p", "yuv422p"}: + assert frame.ptr.width % 2 == 0, "width has to be even for this format" + assert frame.ptr.height % 2 == 0, "height has to be even for this format" return np.hstack( [ useful_array(planes[0]).reshape(-1), useful_array(planes[1]).reshape(-1), useful_array(planes[2]).reshape(-1), ] - ).reshape(-1, width) + ).reshape(-1, frame.ptr.width) if format_name == "yuv422p10le": + assert frame.ptr.width % 2 == 0, "width has to be even for this format" + assert frame.ptr.height % 2 == 0, "height has to be even for this format" # Read planes as uint16 at their original width y = useful_array(planes[0], 2, "uint16") u = useful_array(planes[1], 2, "uint16") @@ -817,7 +822,7 @@ def to_ndarray(self, channel_last=False, **kwargs): useful_array(planes[0]).reshape(-1), useful_array(planes[1], 2).reshape(-1), ] - ).reshape(-1, width) + ).reshape(-1, frame.ptr.width) raise ValueError( f"Conversion to numpy array with format `{format_name}` is not yet supported" From 02ec746c425bff6fe36a6e774614d6d01654e570 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 11 Mar 2026 18:55:20 +0000 Subject: [PATCH 4/4] Optimize planes --- av/video/frame.py | 2 ++ av/video/plane.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/av/video/frame.py b/av/video/frame.py index 5a7ba9d07..5212db292 100644 --- a/av/video/frame.py +++ b/av/video/frame.py @@ -539,6 +539,8 @@ def planes(self): plane_count: cython.int = 0 while plane_count < max_plane_count and self.ptr.extended_data[plane_count]: plane_count += 1 + if plane_count == 1: + return (VideoPlane(self, 0),) return tuple([VideoPlane(self, i) for i in range(plane_count)]) @property diff --git a/av/video/plane.py b/av/video/plane.py index c4908c21d..2b169f9cc 100644 --- a/av/video/plane.py +++ b/av/video/plane.py @@ -26,7 +26,7 @@ def __cinit__(self, frame: VideoFrame, index: cython.int): frames_ctx.sw_format, frame.ptr.width, frame.ptr.height ) - if fmt.name == "pal8" and index == 1: + if index == 1 and fmt.name == "pal8": self.width = 256 self.height = 1 self.buffer_size = 256 * 4