Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
a4db20b
add ltoir test support
abhilash1910 Nov 5, 2025
fb6cfb3
add options for multi-modules
abhilash1910 Nov 25, 2025
7aaed4e
add tests
abhilash1910 Nov 25, 2025
64c7f7d
add bitcode test
abhilash1910 Nov 25, 2025
42ba301
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Nov 25, 2025
7ca6899
fix format
abhilash1910 Nov 25, 2025
0674ea1
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Nov 25, 2025
03b1224
refresh
abhilash1910 Nov 26, 2025
033f11c
apply bitcode file from cupy_test helpers
abhilash1910 Dec 1, 2025
6e411ee
use 2 tuples
abhilash1910 Dec 1, 2025
b4c21db
Merge branch 'main' into nvvm_enhance
abhilash1910 Dec 2, 2025
aeb26aa
refresh
abhilash1910 Dec 3, 2025
b3d6d96
format
abhilash1910 Dec 3, 2025
edd6401
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Dec 3, 2025
d53e00b
Merge branch 'main' into nvvm_enhance
abhilash1910 Dec 7, 2025
8dbbafe
fix from upstream
abhilash1910 Dec 15, 2025
0174bb8
Merge branch 'main' into nvvm_enhance
abhilash1910 Dec 16, 2025
b78f0c3
refresh from upstream
abhilash1910 Dec 17, 2025
99a5593
fix tests
abhilash1910 Dec 17, 2025
783f6e5
take path_finder from PR 447
abhilash1910 Dec 17, 2025
5dbfb2d
add builder files
abhilash1910 Dec 17, 2025
0a9eea9
use python lists/tuples
abhilash1910 Dec 17, 2025
79138c0
libdevice integration
abhilash1910 Dec 18, 2025
25d336c
refresh
abhilash1910 Dec 19, 2025
32c1913
refresh
abhilash1910 Dec 19, 2025
01f03e5
refresh
abhilash1910 Dec 19, 2025
9a5d5fe
use cuda_pathfinder module for libdevice
abhilash1910 Dec 19, 2025
07c6199
rebase
abhilash1910 Feb 5, 2026
e1b19cc
rebase
abhilash1910 Feb 5, 2026
f89aac8
Merge branch 'main' into nvvm_enhance
abhilash1910 Feb 5, 2026
0ad13ae
Merge branch 'main' into nvvm_enhance
abhilash1910 Feb 10, 2026
dcdd100
tests
brandon-b-miller Feb 6, 2026
aca2e36
Address reviews
brandon-b-miller Feb 11, 2026
af6e70a
put libdevice stuff under _static_libs
brandon-b-miller Feb 11, 2026
b1d423f
refresh reviews
abhilash1910 Feb 11, 2026
4cedbb7
change program to cython per PR 1565
abhilash1910 Feb 11, 2026
d9aed9b
Merge branch 'main' into nvvm_enhance
abhilash1910 Feb 11, 2026
ca32d2b
fix import
abhilash1910 Feb 11, 2026
fac1907
fix tests
abhilash1910 Feb 12, 2026
4a01e06
fix ruff check
abhilash1910 Feb 12, 2026
2d5252f
ruff fix find_libdevice
abhilash1910 Feb 12, 2026
2976c24
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Feb 12, 2026
61c1e00
add spdx and copyright
abhilash1910 Feb 12, 2026
c6bea0c
rm redundant include and fix test
abhilash1910 Feb 12, 2026
b7866cf
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Feb 12, 2026
4283230
refresh tests
abhilash1910 Feb 12, 2026
78f4328
add correct libdevice for CTK> 13
abhilash1910 Feb 12, 2026
ddf4839
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Feb 12, 2026
cd3644e
revamp design of pathfinder as LocatedHeaderDir
abhilash1910 Feb 13, 2026
68b33a2
refresh
abhilash1910 Feb 13, 2026
631a113
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Feb 13, 2026
7a02aee
fix mypy errirs
abhilash1910 Feb 13, 2026
b8f2eb0
fix base var declaration
abhilash1910 Feb 13, 2026
434b3c9
format changes
abhilash1910 Feb 13, 2026
9f0a319
format changes
abhilash1910 Feb 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ from .utils import FunctionNotFoundError, NotSupportedError

from cuda.pathfinder import load_nvidia_dynamic_lib


###############################################################################
# Extern
###############################################################################
Expand Down
1 change: 0 additions & 1 deletion cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ from .utils import FunctionNotFoundError, NotSupportedError

from cuda.pathfinder import load_nvidia_dynamic_lib


###############################################################################
# Extern
###############################################################################
Expand Down
2 changes: 2 additions & 0 deletions cuda_core/cuda/core/_program.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ cdef class Program:
object _linker # Linker
object _options # ProgramOptions
object __weakref__
bint _use_libdevice # Flag for libdevice loading
int _module_count
90 changes: 87 additions & 3 deletions cuda_core/cuda/core/_program.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ class ProgramOptions:
pch_verbose: bool | None = None
pch_messages: bool | None = None
instantiate_templates_in_pch: bool | None = None
extra_sources: list[tuple[str, str | bytes | bytearray]] | tuple[tuple[str, str | bytes | bytearray], ...] | None = None
use_libdevice: bool | None = None # For libdevice execution
numba_debug: bool | None = None # Custom option for Numba debugging

def __post_init__(self):
Expand Down Expand Up @@ -458,6 +460,11 @@ def _get_nvvm_module():
_nvvm_module = None
raise e

def _find_libdevice_path():
"""Find libdevice*.bc for NVVM compilation using cuda.pathfinder."""
from cuda.pathfinder import get_libdevice_path
return get_libdevice_path()


cdef inline bint _process_define_macro_inner(list options, object macro) except? -1:
"""Process a single define macro, returning True if successful."""
Expand Down Expand Up @@ -520,12 +527,20 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
cdef const char* code_ptr
cdef const char* name_ptr
cdef size_t code_len
cdef bytes module_bytes
cdef const char* module_ptr
cdef size_t module_len

self._options = options = check_or_create_options(ProgramOptions, options, "Program options")
code_type = code_type.lower()
self._module_count = 0
self._use_libdevice = False

if code_type == "c++":
assert_type(code, str)
if options.extra_sources is not None:
raise ValueError("extra_sources is not supported by the NVRTC backend (C++ code_type)")

# TODO: support pre-loaded headers & include names
code_bytes = code.encode()
code_ptr = <const char*>code_bytes
Expand All @@ -540,6 +555,8 @@ cdef inline int Program_init(Program self, object code, str code_type, object op

elif code_type == "ptx":
assert_type(code, str)
if options.extra_sources is not None:
raise ValueError("extra_sources is not supported by the PTX backend.")
self._linker = Linker(
ObjectCode._init(code.encode(), code_type), options=_translate_program_options(options)
)
Expand All @@ -561,6 +578,54 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
self._h_nvvm = create_nvvm_program_handle(nvvm_prog) # RAII from here
with nogil:
HANDLE_RETURN_NVVM(nvvm_prog, cynvvm.nvvmAddModuleToProgram(nvvm_prog, code_ptr, code_len, name_ptr))
self._module_count = 1

# Add extra modules if provided
if options.extra_sources is not None:
if not is_sequence(options.extra_sources):
raise TypeError(
"extra_sources must be a sequence of 2-tuples: ((name1, source1), (name2, source2), ...)"
)
for i, module in enumerate(options.extra_sources):
if not isinstance(module, tuple) or len(module) != 2:
raise TypeError(
f"Each extra module must be a 2-tuple (name, source)"
f", got {type(module).__name__} at index {i}"
)

module_name, module_source = module

if not isinstance(module_name, str):
raise TypeError(f"Module name at index {i} must be a string, got {type(module_name).__name__}")

if isinstance(module_source, str):
# Textual LLVM IR - encode to UTF-8 bytes
module_source = module_source.encode("utf-8")
elif not isinstance(module_source, (bytes, bytearray)):
raise TypeError(
f"Module source at index {i} must be str (textual LLVM IR), bytes (textual LLVM IR or bitcode), "
f"or bytearray, got {type(module_source).__name__}"
)

if len(module_source) == 0:
raise ValueError(f"Module source for '{module_name}' (index {i}) cannot be empty")
Comment on lines +583 to +611
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker, we can handle it in the next PR. The option validation should be moved to under ProgramOptions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will take this up in the subsequent PR change.


# Add the module using NVVM API
module_bytes = module_source if isinstance(module_source, bytes) else bytes(module_source)
module_ptr = <const char*>module_bytes
module_len = len(module_bytes)
module_name_bytes = module_name.encode()
module_name_ptr = <const char*>module_name_bytes

with nogil:
HANDLE_RETURN_NVVM(nvvm_prog, cynvvm.nvvmAddModuleToProgram(
nvvm_prog, module_ptr, module_len, module_name_ptr))
self._module_count += 1

# Store use_libdevice flag
if options.use_libdevice:
self._use_libdevice = True

self._backend = "NVVM"
self._linker = None

Expand Down Expand Up @@ -649,19 +714,38 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs):
cdef size_t logsize = 0
cdef vector[const char*] options_vec
cdef char* data_ptr = NULL

cdef bytes libdevice_bytes
cdef const char* libdevice_ptr
cdef size_t libdevice_len
# Build options array
options_list = self._options.as_bytes("nvvm", target_type)
options_vec.resize(len(options_list))
for i in range(len(options_list)):
options_vec[i] = <const char*>(<bytes>options_list[i])

# Compile
with nogil:
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmVerifyProgram(prog, <int>options_vec.size(), options_vec.data()))

# Load libdevice if requested - following numba-cuda
if self._use_libdevice:
libdevice_path = _find_libdevice_path()
if libdevice_path is None:
raise RuntimeError(
"use_libdevice=True but could not find libdevice.10.bc. "
"Ensure CUDA toolkit is installed."
)
with open(libdevice_path, "rb") as f:
libdevice_bytes = f.read()
libdevice_ptr = <const char*>libdevice_bytes
libdevice_len = len(libdevice_bytes)
# Use lazy_add_module
with nogil:
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmLazyAddModuleToProgram(
prog, libdevice_ptr, libdevice_len, NULL))

with nogil:
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmCompileProgram(prog, <int>options_vec.size(), options_vec.data()))

# Get compiled result
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetCompiledResultSize(prog, &output_size))
data = bytearray(output_size)
data_ptr = <char*>(<bytearray>data)
Expand Down
183 changes: 177 additions & 6 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cuda.core._module import Kernel, ObjectCode
from cuda.core._program import Program, ProgramOptions
from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return
from cuda_python_test_helpers.nvvm_bitcode import minimal_nvvmir # noqa: F401, F811

cuda_driver_version = handle_return(driver.cuDriverGetVersion())
is_culink_backend = _linker._decide_nvjitlink_or_driver()
Expand Down Expand Up @@ -444,11 +445,23 @@ def test_nvvm_compile_invalid_target(nvvm_ir):


@nvvm_available
@pytest.mark.parametrize("target_type", ["ptx", "ltoir"])
@pytest.mark.parametrize(
"options",
[
ProgramOptions(name="test1", arch="sm_90", device_code_optimize=False),
ProgramOptions(name="test2", arch="sm_100", device_code_optimize=False),
ProgramOptions(name="test3", arch="sm_100", link_time_optimization=True),
ProgramOptions(
name="test4",
arch="sm_90",
ftz=True,
prec_sqrt=False,
prec_div=False,
fma=True,
device_code_optimize=True,
link_time_optimization=True,
),
pytest.param(
ProgramOptions(name="test_sm110_1", arch="sm_110", device_code_optimize=False),
marks=pytest.mark.skipif(
Expand Down Expand Up @@ -480,22 +493,180 @@ def test_nvvm_compile_invalid_target(nvvm_ir):
),
],
)
def test_nvvm_program_options(init_cuda, nvvm_ir, options):
"""Test NVVM programs with different options"""
def test_nvvm_program_options(init_cuda, nvvm_ir, options, target_type):
"""Test NVVM programs with different options and target types (ptx/ltoir)"""
program = Program(nvvm_ir, "nvvm", options)
assert program.backend == "NVVM"

result = program.compile(target_type)
assert isinstance(result, ObjectCode)
assert result.name == options.name

code_content = result.code
assert len(code_content) > 0

if target_type == "ptx":
ptx_text = code_content.decode() if isinstance(code_content, bytes) else str(code_content)
assert ".visible .entry simple(" in ptx_text

program.close()


@nvvm_available
def test_nvvm_program_with_single_extra_source(nvvm_ir):
"""Test NVVM program with a single extra source"""
from cuda.core._program import _get_nvvm_module

nvvm = _get_nvvm_module()
major, minor, debug_major, debug_minor = nvvm.ir_version()
# helper nvvm ir for multiple module loading
helper_nvvmir = f"""target triple = "nvptx64-unknown-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"

define i32 @helper_add(i32 %x) {{
entry:
%result = add i32 %x, 1
ret i32 %result
}}

!nvvmir.version = !{{!0}}
!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}}
""" # noqa: E501

options = ProgramOptions(
name="multi_module_test",
extra_sources=[
("helper", helper_nvvmir),
],
)
program = Program(nvvm_ir, "nvvm", options)

assert program.backend == "NVVM"

ptx_code = program.compile("ptx")
assert isinstance(ptx_code, ObjectCode)
assert ptx_code.name == "multi_module_test"

program.close()


@nvvm_available
def test_nvvm_program_with_multiple_extra_sources():
"""Test NVVM program with multiple extra sources"""
from cuda.core._program import _get_nvvm_module

nvvm = _get_nvvm_module()
major, minor, debug_major, debug_minor = nvvm.ir_version()

main_nvvm_ir = f"""target triple = "nvptx64-unknown-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"

declare i32 @helper_add(i32) nounwind readnone
declare i32 @helper_mul(i32) nounwind readnone

define void @main_kernel(i32* %data) {{
entry:
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%ptr = getelementptr inbounds i32, i32* %data, i32 %tid
%val = load i32, i32* %ptr, align 4

%val1 = call i32 @helper_add(i32 %val)
%val2 = call i32 @helper_mul(i32 %val1)

store i32 %val2, i32* %ptr, align 4
ret void
}}

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone

!nvvm.annotations = !{{!0}}
!0 = !{{void (i32*)* @main_kernel, !"kernel", i32 1}}

!nvvmir.version = !{{!1}}
!1 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}}
""" # noqa: E501

helper1_ir = f"""target triple = "nvptx64-unknown-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"

define i32 @helper_add(i32 %x) nounwind readnone {{
entry:
%result = add i32 %x, 1
ret i32 %result
}}

!nvvmir.version = !{{!0}}
!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}}
""" # noqa: E501

helper2_ir = f"""target triple = "nvptx64-unknown-cuda"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"

define i32 @helper_mul(i32 %x) nounwind readnone {{
entry:
%result = mul i32 %x, 2
ret i32 %result
}}

!nvvmir.version = !{{!0}}
!0 = !{{i32 {major}, i32 {minor}, i32 {debug_major}, i32 {debug_minor}}}
""" # noqa: E501

options = ProgramOptions(
name="nvvm_multi_helper_test",
extra_sources=[
("helper1", helper1_ir),
("helper2", helper2_ir),
],
)
program = Program(main_nvvm_ir, "nvvm", options)

assert program.backend == "NVVM"
ptx_code = program.compile("ptx")
assert isinstance(ptx_code, ObjectCode)
assert ptx_code.name == options.name
assert ptx_code.name == "nvvm_multi_helper_test"

code_content = ptx_code.code
ptx_text = code_content.decode() if isinstance(code_content, bytes) else str(code_content)
assert ".visible .entry simple(" in ptx_text
ltoir_code = program.compile("ltoir")
assert isinstance(ltoir_code, ObjectCode)
assert ltoir_code.name == "nvvm_multi_helper_test"

program.close()


@nvvm_available
def test_bitcode_format(minimal_nvvmir): # noqa: F811
if len(minimal_nvvmir) < 4:
pytest.skip("Bitcode file is not valid or empty")

options = ProgramOptions(name="minimal_nvvmir_bitcode_test", arch="sm_90")
program = Program(minimal_nvvmir, "nvvm", options)

assert program.backend == "NVVM"
ptx_result = program.compile("ptx")
assert isinstance(ptx_result, ObjectCode)
assert ptx_result.name == "minimal_nvvmir_bitcode_test"
assert len(ptx_result.code) > 0
program_lto = Program(minimal_nvvmir, "nvvm", options)
try:
ltoir_result = program_lto.compile("ltoir")
assert isinstance(ltoir_result, ObjectCode)
assert len(ltoir_result.code) > 0
print(f"LTOIR size: {len(ltoir_result.code)} bytes")
except Exception as e:
print(f"LTOIR compilation failed : {e}")
finally:
program.close()


def test_cpp_program_with_extra_sources():
# negative test with NVRTC with multiple sources
code = 'extern "C" __global__ void my_kernel(){}'
helper = 'extern "C" __global__ void helper(){}'
options = ProgramOptions(extra_sources=helper)
with pytest.raises(ValueError, match="extra_sources is not supported by the NVRTC backend"):
Program(code, "c++", options)


def test_program_options_as_bytes_nvrtc():
"""Test ProgramOptions.as_bytes() for NVRTC backend"""
options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True)
Expand Down
Loading