Image Utilities (IU)
 All Data Structures Namespaces Functions Variables Typedefs Enumerations Friends Groups Pages
tensor_gpu.h
1 #pragma once
2 
3 #include "lineardevicememory.h"
4 
5 template<typename, int> class ndarray_ref;
6 
7 namespace iu
8 {
12 template<typename PixelType>
13 class TensorGpu: public LinearDeviceMemory<PixelType, 1>
14 {
15 public:
23  {
24  NCHW, NHWC
25  };
26 
28  typedef PixelType pixel_type;
29 
34  LinearDeviceMemory<PixelType, 1>(), samples_(0), channels_(0), height_(0), width_(0), memoryLayout_(
36  {
37  }
38 
40  virtual ~TensorGpu()
41  {
42  }
43 
51  TensorGpu(const unsigned int N, const unsigned int C, const unsigned int H, const unsigned int W, MemoryLayout memoryLayout = NCHW) :
52  LinearDeviceMemory<PixelType, 1>(N * C * H * W), samples_(N), channels_(C), height_(H), width_(W), memoryLayout_(
54  {
55  }
56 
66  TensorGpu(PixelType* device_data, const unsigned int N, const unsigned int C, const unsigned int H,
67  const unsigned int W, bool ext_data_pointer = false, MemoryLayout memoryLayout = NCHW) :
68  LinearDeviceMemory<PixelType, 1>(device_data, N * C * H * W, ext_data_pointer), samples_(N), channels_(C), height_(
69  H), width_(W), memoryLayout_(memoryLayout)
70  {
71  }
72 
74  unsigned int samples() const
75  {
76  return samples_;
77  }
78 
80  unsigned int channels() const
81  {
82  return channels_;
83  }
84 
86  unsigned int height() const
87  {
88  return height_;
89  }
90 
92  unsigned int width() const
93  {
94  return width_;
95  }
96 
99  {
100  return memoryLayout_;
101  }
102 
104  friend std::ostream& operator<<(std::ostream & out,
105  TensorGpu const& tensor)
106  {
107  out << "Tensor: height=" << tensor.height() << " width="
108  << tensor.width() << " samples=" << tensor.samples() << " channels="
109  << tensor.channels();
110  if (tensor.memoryLayout() == NCHW)
111  out << " memory_layout=NCHW";
112  else if(tensor.memoryLayout() == NHWC)
113  out << " memory_layout=NHWC";
114  out << " onDevice=" << tensor.onDevice();
115  return out;
116  }
117 
124  //struct KernelData
125  {
127  PixelType* data_;
129  unsigned int length_;
131  unsigned int stride0;
133  unsigned int stride1;
135  unsigned int stride2;
136 
138  unsigned short N;
140  unsigned short C;
142  unsigned short H;
144  unsigned short W;
145 
153  __device__ PixelType& operator()(short pos0, short pos1, short pos2, short pos3)
154  {
155  return data_[pos0 * stride0 + pos1 * stride1 + pos2 * stride2 + pos3];
156  }
157 
165  __device__ void coords(unsigned int linearIdx, short *dim0, short *dim1, short *dim2, short *dim3)
166  {
167  // modulo is slow
168 // *dim0 = linearIdx / stride0;
169 // *dim1 = (linearIdx % stride0) / stride1;
170 // *dim2 = ((linearIdx % stride0) % stride1) / stride2;
171 // *dim3 = ((linearIdx % stride0) % stride1) % stride2;
172  *dim0 = linearIdx / stride0;
173  *dim1 = (linearIdx - *dim0 * stride0) / stride1;
174  *dim2 = (linearIdx - (*dim0 * stride0 + *dim1 * stride1)) / stride2;
175  *dim3 = linearIdx - (*dim0 * stride0 + *dim1 * stride1 + *dim2 * stride2);
176  }
177 
179  __host__ TensorKernelData(const TensorGpu<PixelType> &tensor) :
180  //__host__ KernelData(const TensorGpu<PixelType> &tensor) :
181  data_(const_cast<PixelType*>(tensor.data())), length_(tensor.numel()), N(tensor.samples()), C(tensor.channels()),
182  H(tensor.height()), W(tensor.width())
183  {
184  if (tensor.memoryLayout() == NCHW)
185  {
186  stride0 = tensor.channels() * tensor.height() * tensor.width();
187  stride1 = tensor.height() * tensor.width();
188  stride2 = tensor.width();
189  }
190  else if (tensor.memoryLayout() == NHWC)
191  {
192  stride0 = tensor.height() * tensor.width() * tensor.channels();
193  stride1 = tensor.width() * tensor.channels();
194  stride2 = tensor.channels();
195  }
196  }
197  };
198 
201 
204 
205 private:
207  unsigned int samples_;
209  unsigned int channels_;
211  unsigned int height_;
213  unsigned int width_;
215  MemoryLayout memoryLayout_;
216 
217 private:
219  TensorGpu(const TensorGpu&);
221  TensorGpu& operator=(const TensorGpu&);
222 };
223 
224 } // namespace iu
MemoryLayout memoryLayout() const
Definition: tensor_gpu.h:98
__device__ void coords(unsigned int linearIdx, short *dim0, short *dim1, short *dim2, short *dim3)
Definition: tensor_gpu.h:165
unsigned int samples() const
Definition: tensor_gpu.h:74
virtual bool onDevice() const
Definition: lineardevicememory.h:186
PixelType * data(unsigned int offset=0)
Definition: lineardevicememory.h:130
TensorGpu(const unsigned int N, const unsigned int C, const unsigned int H, const unsigned int W, MemoryLayout memoryLayout=NCHW)
Definition: tensor_gpu.h:51
unsigned int width() const
Definition: tensor_gpu.h:92
unsigned short C
Definition: tensor_gpu.h:140
Struct pointer TensorKernelData that can be used in CUDA kernels.
Definition: tensor_gpu.h:123
virtual ~TensorGpu()
Definition: tensor_gpu.h:40
unsigned int numel() const
Definition: linearmemory.h:105
unsigned int height() const
Definition: tensor_gpu.h:86
unsigned short W
Definition: tensor_gpu.h:144
__host__ TensorKernelData(const TensorGpu< PixelType > &tensor)
Definition: tensor_gpu.h:179
TensorGpu(MemoryLayout memoryLayout=NCHW)
Definition: tensor_gpu.h:33
Definition: image_cpu.h:7
unsigned int stride0
Definition: tensor_gpu.h:131
MemoryLayout
Memory layout to access the data elements.
Definition: tensor_gpu.h:22
unsigned short N
Definition: tensor_gpu.h:138
unsigned int channels() const
Definition: tensor_gpu.h:80
TensorGpu(PixelType *device_data, const unsigned int N, const unsigned int C, const unsigned int H, const unsigned int W, bool ext_data_pointer=false, MemoryLayout memoryLayout=NCHW)
Definition: tensor_gpu.h:66
unsigned int length_
Definition: tensor_gpu.h:129
PixelType * data_
Definition: tensor_gpu.h:127
Device 4D tensor class.
Definition: tensor_gpu.h:13
PixelType pixel_type
Definition: tensor_gpu.h:28
unsigned int stride1
Definition: tensor_gpu.h:133
Linear device memory class.
Definition: lineardevicememory.h:18
unsigned short H
Definition: tensor_gpu.h:142
friend std::ostream & operator<<(std::ostream &out, TensorGpu const &tensor)
Definition: tensor_gpu.h:104
unsigned int stride2
Definition: tensor_gpu.h:135
__device__ PixelType & operator()(short pos0, short pos1, short pos2, short pos3)
Definition: tensor_gpu.h:153
ndarray_ref< PixelType, 4 > ref() const