DLPrimitives
context.hpp
1 #pragma once
2 #include <dlprim/opencl_include.hpp>
3 #include <dlprim/definitions.hpp>
4 #include <chrono>
5 #include <stack>
6 #include <map>
7 #include <memory>
8 
9 namespace dlprim {
10 
16 class TimingData {
17 public:
18  bool cpu_only=false;
19  typedef std::chrono::high_resolution_clock clock_type;
20  typedef std::chrono::time_point<clock_type> time_point_type;
21 
22  struct Section {
23  Section(char const *n) : name(n) {}
24  Section(char const *n,time_point_type t) : name(n), start(t) {}
25  char const *name="unknown";
26  time_point_type start;
27  double time_sec;
28  int parent = -1;
29  };
30 
31  struct Data {
32  cl::Event event;
33  char const *name = nullptr;
34  int index = -1;
35  int section = -1;
36  };
37 
38  void enter(char const *name)
39  {
40  sections_.push_back(Section(name,std::chrono::high_resolution_clock::now()));
41  if(!sids_.empty())
42  sections_.back().parent = sids_.top();
43  sids_.push(sections_.size() - 1);
44  }
45  void leave()
46  {
47  int sid = sids_.top();
48  auto now = clock_type::now();
49  auto diff = now - sections_[sid].start;
50  sections_[sid].time_sec = std::chrono::duration_cast<std::chrono::duration<double> >(diff).count();
51  sids_.pop();
52  }
53 
54  void reset()
55  {
56  sections_.clear();
57  while(!sids_.empty())
58  sids_.pop();
59  events_.clear();
60  }
61 
62  std::shared_ptr<Data> add_event(char const *name,int index=-1,cl::Event *ev = nullptr)
63  {
64  std::shared_ptr<Data> e(new Data());
65  if(ev)
66  e->event = *ev;
67  if(!sids_.empty())
68  e->section = sids_.top();
69 
70  e->name = name;
71  e->index = index;
72  events_.push_back(e);
73  return e;
74  }
75 
76  std::vector<Section> &sections() {
77  return sections_;
78  }
79  std::vector<std::shared_ptr<Data> > &events() {
80  return events_;
81  }
82 
83 private:
84  std::vector<Section> sections_;
85  std::stack<int> sids_;
86  std::vector<std::shared_ptr<Data> > events_;
87 };
88 
89 
90 class Context;
91 
122 public:
125  event_(nullptr), events_(nullptr) {}
126 
130  ExecutionContext(cl::CommandQueue const &q) :
131  queue_(new cl::CommandQueue(q)),event_(nullptr),events_(nullptr)
132  {
133  }
137  ExecutionContext(cl::CommandQueue const &q,cl::Event *event) :
138  queue_(new cl::CommandQueue(q)),event_(event),events_(nullptr)
139  {
140  }
141 
145  ExecutionContext(cl::CommandQueue const &q,std::vector<cl::Event> *events) :
146  queue_(new cl::CommandQueue(q)),event_(nullptr),events_(events)
147  {
148  }
152  ExecutionContext(cl::CommandQueue const &q,std::vector<cl::Event> *events,cl::Event *event) :
153  queue_(new cl::CommandQueue(q)),event_(event),events_(events)
154  {
155  }
156 
157  bool is_cpu_context() const
158  {
159  return !queue_;
160  }
161 
162  ExecutionContext(ExecutionContext const &) = default;
163  ExecutionContext &operator=(ExecutionContext const &) = default;
164 
165  bool timing_enabled() const
166  {
167  return !!timing_;
168  }
169 
173  void enable_timing(std::shared_ptr<TimingData> p)
174  {
175  timing_ = p;
176  }
177 
185  ExecutionContext generate_series_context(size_t id,size_t total) const
186  {
187  ExecutionContext ctx = generate_series_context_impl(id,total);
188  ctx.timing_ = timing_;
189  return ctx;
190  }
191 
195  void enter(char const *name) const
196  {
197  if(timing_)
198  timing_->enter(name);
199  }
203  void leave() const
204  {
205  if(timing_)
206  timing_->leave();
207  }
208 
209  void finish()
210  {
211  if(queue_)
212  queue_->finish();
213  }
214 
215 
219  cl::CommandQueue &queue() const
220  {
221  DLPRIM_CHECK(queue_);
222  return *queue_;
223  }
224 
230  cl::Event *event(char const *name = "unknown", int id = -1) const
231  {
232  if(timing_ && !timing_->cpu_only) {
233  return &timing_->add_event(name,id,event_)->event;
234  }
235  return event_;
236  }
240  std::vector<cl::Event> *events() const {
241  return events_;
242  }
243 
244 
249  {
250  if(queue_ == nullptr)
251  return ExecutionContext();
252  return ExecutionContext(queue(),events_);
253  }
254 
259  {
260  if(queue_ == nullptr)
261  return ExecutionContext();
262  return ExecutionContext(queue());
263  }
268  {
269  if(queue_ == nullptr)
270  return ExecutionContext();
271  return ExecutionContext(queue(),event_);
272  }
273 
274 private:
275  ExecutionContext generate_series_context_impl(size_t id,size_t total) const
276  {
277  if(total <= 1)
278  return *this;
279  if(id == 0)
280  return first_context();
281  if(id + 1 >= total)
282  return last_context();
283  return middle_context();
284  }
285 
286 
287  std::shared_ptr<TimingData> timing_;
288  std::shared_ptr<cl::CommandQueue> queue_;
289  cl::Event *event_;
290  std::vector<cl::Event> *events_;
291  friend class Context;
292 };
293 
294 
302 class Context {
303 public:
305  enum ContextType {
306  cpu = 0,
307  ocl = 1,
308  };
309 
310 
316  Context(std::string const &dev_id);
321  Context(ContextType dt = cpu,int platform = 0,int device = 0);
325  Context(cl::Context const &c,cl::Platform const &p,cl::Device const &d);
326 
330  Context(ExecutionContext const &ec);
331 
332  Context(Context const &) = default;
333  Context &operator=(Context const &) = default;
334  Context(Context &&) = default;
335  Context &operator=(Context &&) = default;
336  ~Context() {}
337 
342  std::string name() const;
343 
345  ContextType context_type() const;
346 
348  bool is_cpu_context() const
349  {
350  return type_ == cpu;
351  }
353  bool is_opencl_context() const
354  {
355  return type_ == ocl;
356  }
358  cl::Platform &platform()
359  {
360  return platform_;
361  }
363  cl::Device &device()
364  {
365  return device_;
366  }
367 
369  bool check_device_extension(std::string const &name);
370 
372  std::string const &device_extensions();
373 
380  int estimated_core_count();
381 
383  bool is_amd();
385  bool is_nvidia();
387  bool is_intel();
388 
390  cl::Context &context()
391  {
392  return context_;
393  }
395  cl::CommandQueue make_queue(cl_command_queue_properties props=0)
396  {
397  cl::CommandQueue q;
398  if(!is_cpu_context())
399  q=std::move(cl::CommandQueue(context_,device_,props));
400  return q;
401  }
402 
404  ExecutionContext make_execution_context(cl_command_queue_properties props=0)
405  {
406  if(is_cpu_context())
407  return ExecutionContext();
408  else
409  return ExecutionContext(make_queue(props));
410  }
411 
412 private:
413  void select_opencl_device(int p,int d);
414  cl::Platform platform_;
415  cl::Device device_;
416  cl::Context context_;
417  ContextType type_;;
418  std::map<std::string,bool> ext_cache_;
419  std::string ext_;
420 };
421 
422 
423 
424 class ExecGuard {
425 public:
426  ExecGuard(ExecGuard const &) = delete;
427  void operator=(ExecGuard const &) = delete;
428  ExecGuard(ExecutionContext const &ctx,char const *name) : ctx_(&ctx)
429  {
430  ctx_->enter(name);
431  }
432  ~ExecGuard()
433  {
434  ctx_->leave();
435  }
436 private:
437  ExecutionContext const *ctx_;
438 };
439 
440 
441 } // namespace
443 
cl::Device & device()
Get OpenCL device object.
Definition: context.hpp:363
void enter(char const *name) const
Profiling scope enter called by ExecGuard::ExecGuard()
Definition: context.hpp:195
ExecutionContext(cl::CommandQueue const &q)
Create context from cl::CommandQueue, note no events will be waited/signaled.
Definition: context.hpp:130
Definition: context.hpp:424
Definition: context.hpp:22
std::vector< cl::Event > * events() const
Get events to wait for.
Definition: context.hpp:240
ExecutionContext last_context() const
Create context that signals for completion event if needed - use only if you know that more kernels r...
Definition: context.hpp:267
ExecutionContext(cl::CommandQueue const &q, cl::Event *event)
Create a context with a request to signal completion event.
Definition: context.hpp:137
ExecutionContext make_execution_context(cl_command_queue_properties props=0)
Generate ExecutionContext (queue + events)
Definition: context.hpp:404
ContextType
Device used with the context, CPU or OpenCL device.
Definition: context.hpp:305
Definition: context.hpp:31
bool is_opencl_context() const
Returns true if the context was created as OpenCL context.
Definition: context.hpp:353
ExecutionContext(cl::CommandQueue const &q, std::vector< cl::Event > *events)
Create a context with a request to wait for events.
Definition: context.hpp:145
cl::CommandQueue make_queue(cl_command_queue_properties props=0)
Creates a new Command queue for the context with optional properties.
Definition: context.hpp:395
ExecutionContext middle_context() const
Create context does not wait or signals use only if you know that more kernels run before and after...
Definition: context.hpp:258
ExecutionContext()
default constructor - can be used for CPU context
Definition: context.hpp:124
This is main object that represent the pair of OpenCL platform and device all other objects use it...
Definition: context.hpp:302
Class used for benchmarking of the model.
Definition: context.hpp:16
cl::Event * event(char const *name="unknown", int id=-1) const
Get event to signal. Note: name is used for profiling. Such that profiling is enabled profiling conte...
Definition: context.hpp:230
ExecutionContext first_context() const
Create context that waits for event if needed - use only if you know that more kernels are followed...
Definition: context.hpp:248
void leave() const
Profiling scope leave, called by ExecGuard::~ExecGuard()
Definition: context.hpp:203
Mane namespace.
Definition: context.hpp:9
bool is_cpu_context() const
Returns true if the context was created as CPU context.
Definition: context.hpp:348
cl::Context & context()
Get OpenCL context object.
Definition: context.hpp:390
ExecutionContext generate_series_context(size_t id, size_t total) const
Create contexts for multiple enqueues.
Definition: context.hpp:185
cl::CommandQueue & queue() const
Get the command queue. Never call it in non-OpenCL context.
Definition: context.hpp:219
ExecutionContext(cl::CommandQueue const &q, std::vector< cl::Event > *events, cl::Event *event)
Create a context with a request to signal completion event and wait for events.
Definition: context.hpp:152
cl::Platform & platform()
Get OpenCL platform object.
Definition: context.hpp:358
void enable_timing(std::shared_ptr< TimingData > p)
Add benchmarking/traceing object data.
Definition: context.hpp:173
This class is used to pass cl::Events that the kernel should wait for and/or signal event completion...
Definition: context.hpp:121