Overte C++ Documentation
GLEscrow.h
1 //
2 // Created by Bradley Austin Davis on 2015/08/06.
3 // Copyright 2015 High Fidelity, Inc.
4 //
5 // Distributed under the Apache License, Version 2.0.
6 // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
7 //
8 
9 #pragma once
10 #ifndef hifi_GLEscrow_h
11 #define hifi_GLEscrow_h
12 
13 #include <utility>
14 #include <algorithm>
15 #include <deque>
16 #include <forward_list>
17 #include <functional>
18 #include <mutex>
19 
20 #include <SharedUtil.h>
21 #include <NumericalConstants.h>
22 
23 #include "Config.h"
24 
25 // The GLEscrow class provides a simple mechanism for producer GL contexts to provide
26 // content to a consumer where the consumer is assumed to be connected to a display and
27 // therefore must never be blocked.
28 //
29 // So we need to accomplish a few things.
30 //
31 // First the producer context needs to be able to supply content to the primary thread
32 // in such a way that the consumer only gets it when it's actually valid for reading
33 // (meaning that the async writing operations have been completed)
34 //
35 // Second, the client thread should be able to release the resource when it's finished
36 // using it (but again the reading of the resource is likely asyncronous)
37 //
38 // Finally, blocking operations need to be minimal, and any potentially blocking operations
39 // that can't be avoided need to be pushed to the submission context to avoid impacting
40 // the framerate of the consumer
41 //
42 // This class acts as a kind of border guard and holding pen between the two contexts
43 // to hold resources which the CPU is no longer using, but which might still be
44 // in use by the GPU. Fence sync objects are used to moderate the actual release of
45 // resources in either direction.
46 template <
47  typename T
48  //,
50  //typename = typename std::enable_if<std::is_arithmetic<T>::value, T>::type
51 >
52 class GLEscrow {
53 public:
54  static const uint64_t MAX_UNSIGNALED_TIME = USECS_PER_SECOND / 2;
55 
56  const T& invalid() const {
57  static const T INVALID_RESULT;
58  return INVALID_RESULT;
59  }
60 
61  struct Item {
62  const T _value;
63  GLsync _sync;
64  const uint64_t _created;
65 
66  Item(T value, GLsync sync) :
67  _value(value), _sync(sync), _created(usecTimestampNow())
68  {
69  }
70 
71  uint64_t age() const {
72  return usecTimestampNow() - _created;
73  }
74 
75  bool signaled() const {
76  auto result = glClientWaitSync(_sync, 0, 0);
77  if (GL_TIMEOUT_EXPIRED != result && GL_WAIT_FAILED != result) {
78  return true;
79  }
80  return false;
81  }
82  };
83 
84  using Mutex = std::mutex;
85  using Recycler = std::function<void(T t)>;
86  // deque gives us random access, double ended push & pop and size, all in constant time
87  using Deque = std::deque<Item>;
88  using List = std::forward_list<Item>;
89 
90  void setRecycler(Recycler recycler) {
91  _recycler = recycler;
92  }
93 
94  template <typename F>
95  void withLock(F f) {
96  using Lock = std::unique_lock<Mutex>;
97  Lock lock(_mutex);
98  f();
99  }
100 
101  template <typename F>
102  bool tryLock(F f) {
103  using Lock = std::unique_lock<Mutex>;
104  bool result = false;
105  Lock lock(_mutex, std::try_to_lock_t());
106  if (lock.owns_lock()) {
107  f();
108  result = true;
109  }
110  return result;
111  }
112 
113 
114  size_t depth() {
115  size_t result{ 0 };
116  withLock([&]{
117  result = _submits.size();
118  });
119  return result;
120  }
121 
122  // Submit a new resource from the producer context
123  // returns the number of prior submissions that were
124  // never consumed before becoming available.
125  // producers should self-limit if they start producing more
126  // work than is being consumed;
127  size_t submit(T t, GLsync writeSync = 0) {
128  if (!writeSync) {
129  // FIXME should the release and submit actually force the creation of a fence?
130  writeSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
131  glFlush();
132  }
133 
134  withLock([&]{
135  _submits.push_back(Item(t, writeSync));
136  });
137  return cleanTrash();
138  }
139 
140  // Returns the next available resource provided by the submitter,
141  // or if none is available (which could mean either the submission
142  // list is empty or that the first item on the list isn't yet signaled
143  // Deprecated... will inject an unecessary GPU bubble
144  bool fetchSignaled(T& t) {
145  bool result = false;
146  // On the one hand using try_lock() reduces the chance of blocking the consumer thread,
147  // but if the produce thread is going fast enough, it could effectively
148  // starve the consumer out of ever actually getting resources.
149  tryLock([&] {
150  // May be called on any thread, but must be inside a locked section
151  if (signaled(_submits, 0)) {
152  result = true;
153  t = _submits.at(0)._value;
154  _submits.pop_front();
155  }
156  });
157  return result;
158  }
159 
160  // Populates t with the next available resource provided by the submitter
161  // and sync with a fence that will be signaled when all write commands for the
162  // item have completed. Returns false if no resources are available
163  bool fetchWithFence(T& t, GLsync& sync) {
164  bool result = false;
165  // On the one hand using try_lock() reduces the chance of blocking the consumer thread,
166  // but if the produce thread is going fast enough, it could effectively
167  // starve the consumer out of ever actually getting resources.
168  tryLock([&] {
169  if (!_submits.empty()) {
170  result = true;
171  // When fetching with sync, we only want the latest item
172  auto item = _submits.back();
173  _submits.pop_back();
174 
175  // Throw everything else in the trash
176  for (const auto& oldItem : _submits) {
177  _trash.push_front(oldItem);
178  }
179  _submits.clear();
180 
181  t = item._value;
182  sync = item._sync;
183  }
184  });
185  return result;
186  }
187 
188  bool fetchWithGpuWait(T& t) {
189  GLsync sync { 0 };
190  if (fetchWithFence(t, sync)) {
191  // Texture was updated, inject a wait into the GL command stream to ensure
192  // commands on this context until the commands to generate t are finished.
193  if (sync != 0) {
194  glWaitSync(sync, 0, GL_TIMEOUT_IGNORED);
195  glDeleteSync(sync);
196  }
197  return true;
198  }
199  return false;
200  }
201 
202  // Returns the next available resource provided by the submitter,
203  // or if none is available (which could mean either the submission
204  // list is empty or that the first item on the list isn't yet signaled
205  // Also releases any previous texture held by the caller
206  bool fetchSignaledAndRelease(T& value) {
207  T originalValue = value;
208  if (fetchSignaled(value)) {
209  if (originalValue != invalid()) {
210  release(originalValue);
211  }
212  return true;
213  }
214  return false;
215  }
216 
217  bool fetchAndReleaseWithFence(T& value, GLsync& sync) {
218  T originalValue = value;
219  if (fetchWithFence(value, sync)) {
220  if (originalValue != invalid()) {
221  release(originalValue);
222  }
223  return true;
224  }
225  return false;
226  }
227 
228  bool fetchAndReleaseWithGpuWait(T& value) {
229  T originalValue = value;
230  if (fetchWithGpuWait(value)) {
231  if (originalValue != invalid()) {
232  release(originalValue);
233  }
234  return true;
235  }
236  return false;
237  }
238 
239  // If fetch returns a non-zero value, it's the responsibility of the
240  // client to release it at some point
241  void release(const T& t, GLsync readSync = 0) {
242  if (!readSync) {
243  // FIXME should the release and submit actually force the creation of a fence?
244  readSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
245  glFlush();
246  }
247 
248  withLock([&]{
249  _releases.push_back(Item(t, readSync));
250  });
251  }
252 
253 private:
254  size_t cleanTrash() {
255  size_t wastedWork{ 0 };
256  List trash;
257  tryLock([&]{
258  while (!_submits.empty()) {
259  const auto& item = _submits.front();
260  if (!item._sync || item.age() < MAX_UNSIGNALED_TIME) {
261  break;
262  }
263  qWarning() << "Long unsignaled sync " << item._sync << " unsignaled for " << item.age();
264  _trash.push_front(item);
265  _submits.pop_front();
266  }
267 
268  // We only ever need one ready item available in the list, so if the
269  // second item is signaled (implying the first is as well, remove the first
270  // item. Iterate until the SECOND item in the list is not in the ready state
271  // The signaled function takes care of checking against the deque size
272  while (signaled(_submits, 1)) {
273  _trash.push_front(_submits.front());
274  _submits.pop_front();
275  ++wastedWork;
276  }
277 
278  // Stuff in the release queue can be cleared out as soon as it's signaled
279  while (signaled(_releases, 0)) {
280  _trash.push_front(_releases.front());
281  _releases.pop_front();
282  }
283 
284  trash.swap(_trash);
285  });
286 
287  // FIXME maybe doing a timing on the deleters and warn if it's taking excessive time?
288  // although we are out of the lock, so it shouldn't be blocking anything
289  std::for_each(trash.begin(), trash.end(), [&](typename List::const_reference item) {
290  if (item._value) {
291  _recycler(item._value);
292  }
293  if (item._sync) {
294  glDeleteSync(item._sync);
295  }
296  });
297  return wastedWork;
298  }
299 
300  // May be called on any thread, but must be inside a locked section
301  bool signaled(Deque& deque, size_t i) {
302  if (i >= deque.size()) {
303  return false;
304  }
305 
306  auto& item = deque.at(i);
307  // If there's no sync object, either it's not required or it's already been found to be signaled
308  if (!item._sync) {
309  return true;
310  }
311 
312  // Check the sync value using a zero timeout to ensure we don't block
313  // This is critically important as this is the only GL function we'll call
314  // inside the locked sections, so it cannot have any latency
315  if (item.signaled()) {
316  // if the sync is signaled, queue it for deletion
317  _trash.push_front(Item(invalid(), item._sync));
318  // And change the stored value to 0 so we don't check it again
319  item._sync = 0;
320  return true;
321  }
322 
323  return false;
324  }
325 
326  Mutex _mutex;
327  Recycler _recycler;
328  // Items coming from the submission / writer context
329  Deque _submits;
330  // Items coming from the client context.
331  Deque _releases;
332  // Items which are no longer in use.
333  List _trash;
334 };
335 
336 template<>
337 inline const GLuint& GLEscrow<GLuint>::invalid() const {
338  static const GLuint INVALID_RESULT { 0 };
339  return INVALID_RESULT;
340 }
341 
342 using GLTextureEscrow = GLEscrow<GLuint>;
343 
344 #endif
345