unilab.ipc.replay_pipelines.transfer.cuda_like.CudaLikeReplayTransferBackend

class unilab.ipc.replay_pipelines.transfer.cuda_like.CudaLikeReplayTransferBackend[source]

Bases: object

Pinned host to CUDA-like device transfer backend.

PyTorch ROCm exposes the same torch.cuda surface for runtime streams and events, so this backend intentionally keys on the PyTorch device type instead of NVIDIA-specific platform names.

Parameters:

Methods

__init__(*, device, ring_depth)

allocate_device_slots(*, count, shape, dtype)

clear_ready(slot)

close()

ready_query(slot)

register_host_slots(slots)

submit_h2d(*, slot, dst, src, metadata, ...)

synchronize_ready(slot)

wait_current_stream_for_ready(slot)

Attributes

host_memory_kind = 'registered_pinned_shared'
supports_async_submit = True
supports_timing_events = True
__init__(*, device, ring_depth)[source]
Parameters:
register_host_slots(slots)[source]
Parameters:

slots (list[Tensor])

Return type:

None

allocate_device_slots(*, count, shape, dtype)[source]
Parameters:
Return type:

list[Tensor]

submit_h2d(*, slot, dst, src, metadata, trace_recorder, trace_cuda_events, h2d_bytes, pack_layout, pack_executor)[source]
Parameters:
Return type:

float

clear_ready(slot)[source]
Parameters:

slot (int)

Return type:

None

ready_query(slot)[source]
Parameters:

slot (int)

Return type:

bool

synchronize_ready(slot)[source]
Parameters:

slot (int)

Return type:

None

wait_current_stream_for_ready(slot)[source]
Parameters:

slot (int)

Return type:

None

close()[source]
Return type:

None