34 namespace device_memory {
    44   size_t bytes = 
sizeof(T) * count;
    46   cudaError_t cuda_error = cudaMalloc((
void**)&ptr, bytes);
    47   if (cuda_error != cudaSuccess) {
    58     cudaError_t cuda_error = (cudaFree(ptr));
    59     if (cuda_error != cudaSuccess) {
    70 void copy(T* dst, T 
const* src, 
size_t count, cudaMemcpyKind kind) {
    72   if (bytes == 0 && count > 0)
    74   cudaError_t cuda_error = (cudaMemcpy(dst, src, bytes, kind));
    75   if (cuda_error != cudaSuccess) {
    82   copy(dst, src, count, cudaMemcpyHostToDevice);
    87   copy(dst, src, count, cudaMemcpyDeviceToHost);
    92   copy(dst, src, count, cudaMemcpyDeviceToDevice);
    97   copy(dst, src, count, cudaMemcpyHostToHost);
   101 template <
typename OutputIterator, 
typename T>
   102 void insert_to_host(OutputIterator begin, OutputIterator end, T 
const* device_begin) {
   103   size_t elements = end - begin;
   108 template <
typename T, 
typename InputIterator>
   110   size_t elements = end - begin;
   119 template <
typename T>
   124       cudaError_t cuda_error = (cudaFree(ptr));
   125       if (cuda_error != cudaSuccess) {
   162   T* 
get() 
const { 
return smart_ptr.
get(); }
   177   void reset(T* _ptr, 
size_t _capacity) {
   178     smart_ptr.
reset(_ptr);
   179     capacity = _capacity;
 Definition: aligned_buffer.h:35
allocation(size_t _capacity)
Constructor: allocates capacity elements on the current CUDA device. 
Definition: device_memory.h:151
void insert_to_device(T *device_begin, InputIterator begin, InputIterator end)
Copies elements to device memory from host-side range. 
Definition: device_memory.h:109
deleter & get_deleter()
Returns the deleter object which would be used for destruction of the managed object. 
Definition: device_memory.h:186
void copy_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:81
void copy(T *dst, T const *src, size_t count, cudaMemcpyKind kind)
Definition: device_memory.h:70
void operator()(T *ptr)
Definition: device_memory.h:123
T * get() const 
Returns a pointer to the managed object. 
Definition: device_memory.h:162
void reset()
Deletes the managed object and resets capacity to zero. 
Definition: device_memory.h:171
Delete functor for CUDA device memory. 
Definition: device_memory.h:122
T * release()
Releases the ownership of the managed object (without deleting) and resets capacity to zero...
Definition: device_memory.h:165
T * allocate(size_t count=1)
Allocate a buffer of count elements of type T on the current CUDA device. 
Definition: device_memory.h:42
platform::unique_ptr< T, deleter > smart_ptr
Smart pointer. 
Definition: device_memory.h:141
size_t capacity
Number of elements of T allocated on the current CUDA device. 
Definition: device_memory.h:138
Defines the size of an element in bits. 
Definition: numeric_types.h:42
void copy_host_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:96
C++ exception semantics for CUDA error codes. 
Top-level include for all CUTLASS numeric types. 
T * operator->() const 
Returns a pointer to the object owned by *this. 
Definition: device_memory.h:183
void copy_to_host(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:86
~allocation()
Destructor. 
Definition: device_memory.h:159
const deleter & get_deleter() const 
Returns the deleter object which would be used for destruction of the managed object (const) ...
Definition: device_memory.h:189
C++ exception wrapper for CUDA cudaError_t. 
Definition: exceptions.h:36
allocation & operator=(allocation const &p)
Copies a device-side memory allocation. 
Definition: device_memory.h:192
allocation()
Constructor: allocates no memory. 
Definition: device_memory.h:148
void reset(T *_ptr, size_t _capacity)
Deletes managed object, if owned, and replaces its reference with a given pointer and capacity...
Definition: device_memory.h:177
void free(T *ptr)
Free the buffer pointed to by ptr. 
Definition: device_memory.h:56
void insert_to_host(OutputIterator begin, OutputIterator end, T const *device_begin)
Copies elements from device memory to host-side range. 
Definition: device_memory.h:102
Device allocation abstraction that tracks size and capacity. 
Definition: device_memory.h:120
allocation(allocation const &p)
Copy constructor. 
Definition: device_memory.h:154
void copy_device_to_device(T *dst, T const *src, size_t count=1)
Definition: device_memory.h:91