Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 56 additions & 38 deletions av/video/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,13 @@ class PictureType(IntEnum):
BI = lib.AV_PICTURE_TYPE_BI # BI type


_is_big_endian = cython.declare(cython.bint, sys.byteorder == "big")


@cython.cfunc
@cython.inline
def byteswap_array(array, big_endian: cython.bint):
if (sys.byteorder == "big") != big_endian:
if _is_big_endian != big_endian:
return array.byteswap()
return array

Expand Down Expand Up @@ -429,23 +433,31 @@ def copy_array_to_plane(array, plane: VideoPlane, bytes_per_pixel: cython.uint):


@cython.cfunc
@cython.inline
def useful_array(
plane: VideoPlane, bytes_per_pixel: cython.uint = 1, dtype: str = "uint8"
):
"""
Return the useful part of the VideoPlane as a single dimensional array.
Return the useful part of the VideoPlane as a strided array.

We are simply discarding any padding which was added for alignment.
We are simply creating a view that discards any padding which was added for
alignment.
"""
import numpy as np

total_line_size: cython.size_t = abs(plane.line_size)
useful_line_size: cython.size_t = plane.width * bytes_per_pixel
if total_line_size == useful_line_size:
return np.frombuffer(plane, dtype=dtype)
arr = np.frombuffer(plane, np.uint8)
arr = arr.reshape(-1, total_line_size)[:, 0:useful_line_size].reshape(-1)
return arr.view(np.dtype(dtype))
dtype_obj = np.dtype(dtype)
total_line_size = abs(plane.frame.ptr.linesize[plane.index])
itemsize = dtype_obj.itemsize
channels = bytes_per_pixel // itemsize

if channels == 1:
shape = (plane.height, plane.width)
strides = (total_line_size, itemsize)
else:
shape = (plane.height, plane.width, channels)
strides = (total_line_size, bytes_per_pixel, itemsize)

return np.ndarray(shape, dtype=dtype_obj, buffer=plane, strides=strides)


@cython.cfunc
Expand Down Expand Up @@ -527,6 +539,8 @@ def planes(self):
plane_count: cython.int = 0
while plane_count < max_plane_count and self.ptr.extended_data[plane_count]:
plane_count += 1
if plane_count == 1:
return (VideoPlane(self, 0),)
return tuple([VideoPlane(self, i) for i in range(plane_count)])

@property
Expand Down Expand Up @@ -744,49 +758,50 @@ def to_ndarray(self, channel_last=False, **kwargs):

# check size
format_name = frame.format.name
height, width = frame.ptr.height, frame.ptr.width
planes: tuple[VideoPlane, ...] = frame.planes
if format_name in {"yuv420p", "yuvj420p", "yuyv422", "yuv422p10le", "yuv422p"}:
assert width % 2 == 0, "the width has to be even for this pixel format"
assert height % 2 == 0, "the height has to be even for this pixel format"

# cases planes are simply concatenated in shape (height, width, channels)
if format_name in _np_pix_fmt_dtypes:
if format_name == "yuyv422":
assert frame.ptr.width % 2 == 0, "width has to be even for yuyv422"
assert frame.ptr.height % 2 == 0, "height has to be even for yuyv422"
itemsize: cython.uint
itemsize, dtype = _np_pix_fmt_dtypes[format_name]
if len(planes) == 1: # shortcut, avoid memory copy
array = useful_array(planes[0], itemsize, dtype).reshape(
height, width, -1
)
num_planes: cython.size_t = len(planes)
if num_planes == 1: # shortcut, avoid memory copy
array = useful_array(planes[0], itemsize, dtype)
else: # general case
array = np.empty((height, width, len(planes)), dtype=dtype)
for i, plane in enumerate(planes):
array[:, :, i] = useful_array(plane, itemsize, dtype).reshape(
height, width
)
array = np.empty(
(frame.ptr.height, frame.ptr.width, num_planes), dtype=dtype
)
if format_name.startswith("gbr"):
plane_indices = (2, 0, 1, *range(3, num_planes))
else:
plane_indices = range(num_planes)
for i, p_idx in enumerate(plane_indices):
array[:, :, i] = useful_array(planes[p_idx], itemsize, dtype)
array = byteswap_array(array, format_name.endswith("be"))
if array.shape[2] == 1: # skip last channel for gray images
return array.squeeze(2)
if format_name.startswith("gbr"): # gbr -> rgb
array[:, :, :3] = array[:, :, [2, 0, 1]]
if not channel_last and format_name in {"yuv444p", "yuvj444p"}:
array = np.moveaxis(array, 2, 0)
return array

# special cases
if format_name in {"yuv420p", "yuvj420p", "yuv422p"}:
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
return np.hstack(
[
useful_array(planes[0]),
useful_array(planes[1]),
useful_array(planes[2]),
useful_array(planes[0]).reshape(-1),
useful_array(planes[1]).reshape(-1),
useful_array(planes[2]).reshape(-1),
]
).reshape(-1, width)
).reshape(-1, frame.ptr.width)
if format_name == "yuv422p10le":
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
# Read planes as uint16 at their original width
y = useful_array(planes[0], 2, "uint16").reshape(height, width)
u = useful_array(planes[1], 2, "uint16").reshape(height, width // 2)
v = useful_array(planes[2], 2, "uint16").reshape(height, width // 2)
y = useful_array(planes[0], 2, "uint16")
u = useful_array(planes[1], 2, "uint16")
v = useful_array(planes[2], 2, "uint16")

# Double the width of U and V by repeating each value
u_full = np.repeat(u, 2, axis=1)
Expand All @@ -795,7 +810,7 @@ def to_ndarray(self, channel_last=False, **kwargs):
return np.stack([y, u_full, v_full], axis=2)
return np.stack([y, u_full, v_full], axis=0)
if format_name == "pal8":
image = useful_array(planes[0]).reshape(height, width)
image = useful_array(planes[0])
palette = (
np.frombuffer(planes[1], "i4")
.astype(">i4")
Expand All @@ -805,8 +820,11 @@ def to_ndarray(self, channel_last=False, **kwargs):
return image, palette
if format_name == "nv12":
return np.hstack(
[useful_array(planes[0]), useful_array(planes[1], 2)]
).reshape(-1, width)
[
useful_array(planes[0]).reshape(-1),
useful_array(planes[1], 2).reshape(-1),
]
).reshape(-1, frame.ptr.width)

raise ValueError(
f"Conversion to numpy array with format `{format_name}` is not yet supported"
Expand Down
2 changes: 1 addition & 1 deletion av/video/plane.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __cinit__(self, frame: VideoFrame, index: cython.int):
frames_ctx.sw_format, frame.ptr.width, frame.ptr.height
)

if fmt.name == "pal8" and index == 1:
if index == 1 and fmt.name == "pal8":
self.width = 256
self.height = 1
self.buffer_size = 256 * 4
Expand Down
Loading