Overte C++ Documentation
Batch.h
1 //
2 // Batch.h
3 // interface/src/gpu
4 //
5 // Created by Sam Gateau on 10/14/2014.
6 // Copyright 2014 High Fidelity, Inc.
7 //
8 // Distributed under the Apache License, Version 2.0.
9 // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
10 //
11 #ifndef hifi_gpu_Batch_h
12 #define hifi_gpu_Batch_h
13 
14 #include <vector>
15 #include <mutex>
16 #include <functional>
17 #include <glm/gtc/type_ptr.hpp>
18 
19 #include <shared/NsightHelpers.h>
20 
21 #include "Framebuffer.h"
22 #include "Pipeline.h"
23 #include "Query.h"
24 #include "Stream.h"
25 #include "Texture.h"
26 #include "Transform.h"
27 #include "ShaderConstants.h"
28 
29 class QDebug;
30 #define BATCH_PREALLOCATE_MIN 128
31 namespace gpu {
32 
33 // The named batch data provides a mechanism for accumulating data into buffers over the course
34 // of many independent calls. For instance, two objects in the scene might both want to render
35 // a simple box, but are otherwise unaware of each other. The common code that they call to render
36 // the box can create buffers to store the rendering parameters for each box and register a function
37 // that will be called with the accumulated buffer data when the batch commands are finally
38 // executed against the backend
39 
40 
41 class Batch {
42 public:
43  typedef Stream::Slot Slot;
44 
45 
46  class DrawCallInfo {
47  public:
48  using Index = uint16_t;
49 
50  DrawCallInfo(Index idx) : index(idx) {}
51  DrawCallInfo(Index idx, Index user) : index(idx), unused(user) {}
52 
53  Index index { 0 };
54  uint16_t unused { 0 }; // Reserved space for later
55 
56  };
57  // Make sure DrawCallInfo has no extra padding
58  static_assert(sizeof(DrawCallInfo) == 4, "DrawCallInfo size is incorrect.");
59 
60  using DrawCallInfoBuffer = std::vector<DrawCallInfo>;
61 
62  struct NamedBatchData {
63  using BufferPointers = std::vector<BufferPointer>;
64  using Function = std::function<void(gpu::Batch&, NamedBatchData&)>;
65 
66  BufferPointers buffers;
67  Function function;
68  DrawCallInfoBuffer drawCallInfos;
69 
70  size_t count() const { return drawCallInfos.size(); }
71 
72  void process(Batch& batch) {
73  if (function) {
74  function(batch, *this);
75  }
76  }
77  };
78 
79  using NamedBatchDataMap = std::map<std::string, NamedBatchData>;
80 
81  DrawCallInfoBuffer _drawCallInfos;
82  static size_t _drawCallInfosMax;
83 
84  mutable std::string _currentNamedCall;
85 
86  const DrawCallInfoBuffer& getDrawCallInfoBuffer() const;
87  DrawCallInfoBuffer& getDrawCallInfoBuffer();
88 
89  void captureDrawCallInfo();
90  void captureNamedDrawCallInfo(std::string name);
91 
92  Batch(const std::string& name = "");
93  // Disallow copy construction and assignement of batches
94  Batch(const Batch& batch) = delete;
95  Batch& operator=(const Batch& batch) = delete;
96  ~Batch();
97 
98  void setName(const std::string& name);
99  const std::string& getName() const { return _name; }
100  void clear();
101 
102  // Batches may need to override the context level stereo settings
103  // if they're performing framebuffer copy operations, like the
104  // deferred lighting resolution mechanism
105  void enableStereo(bool enable = true);
106  bool isStereoEnabled() const;
107 
108  // Stereo batches will pre-translate the view matrix, but this isn't
109  // appropriate for skyboxes or other things intended to be drawn at
110  // infinite distance, so provide a mechanism to render in stereo
111  // without the pre-translation of the view.
112  void enableSkybox(bool enable = true);
113  bool isSkyboxEnabled() const;
114 
115  // Drawcall Uniform value
116  // One 16bit word uniform value is available during the drawcall
117  // its value must be set before each drawcall
118  void setDrawcallUniform(uint16 uniform);
119  // It is reset to the reset value between each drawcalls
120  // The reset value is 0 by default and can be changed as a batch state with this call
121  void setDrawcallUniformReset(uint16 resetUniform);
122 
123  // Drawcalls
124  void draw(Primitive primitiveType, uint32 numVertices, uint32 startVertex = 0);
125  void drawIndexed(Primitive primitiveType, uint32 numIndices, uint32 startIndex = 0);
126  void drawInstanced(uint32 numInstances, Primitive primitiveType, uint32 numVertices, uint32 startVertex = 0, uint32 startInstance = 0);
127  void drawIndexedInstanced(uint32 numInstances, Primitive primitiveType, uint32 numIndices, uint32 startIndex = 0, uint32 startInstance = 0);
128  void multiDrawIndirect(uint32 numCommands, Primitive primitiveType);
129  void multiDrawIndexedIndirect(uint32 numCommands, Primitive primitiveType);
130 
131  void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function);
132  const BufferPointer& getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
133 
134  // Input Stage
135  // InputFormat
136  // InputBuffers
137  // IndexBuffer
138  void setInputFormat(const Stream::FormatPointer& format);
139 
140  void setInputBuffer(Slot channel, const BufferPointer& buffer, Offset offset, Offset stride);
141  void setInputBuffer(Slot channel, const BufferView& buffer); // not a command, just a shortcut from a BufferView
142  void setInputStream(Slot startChannel, const BufferStream& stream); // not a command, just unroll into a loop of setInputBuffer
143 
144  void setIndexBuffer(Type type, const BufferPointer& buffer, Offset offset);
145  void setIndexBuffer(const BufferView& buffer); // not a command, just a shortcut from a BufferView
146 
147  // Indirect buffer is used by the multiDrawXXXIndirect calls
148  // The indirect buffer contains the command descriptions to execute multiple drawcalls in a single call
149  void setIndirectBuffer(const BufferPointer& buffer, Offset offset = 0, Offset stride = 0);
150 
151  // multi command desctription for multiDrawIndexedIndirect
152  class DrawIndirectCommand {
153  public:
154  uint _count{ 0 };
155  uint _instanceCount{ 0 };
156  uint _firstIndex{ 0 };
157  uint _baseInstance{ 0 };
158  };
159 
160  // multi command desctription for multiDrawIndexedIndirect
161  class DrawIndexedIndirectCommand {
162  public:
163  uint _count{ 0 };
164  uint _instanceCount{ 0 };
165  uint _firstIndex{ 0 };
166  uint _baseVertex{ 0 };
167  uint _baseInstance{ 0 };
168  };
169 
170  // Transform Stage
171  // Vertex position is transformed by ModelTransform from object space to world space
172  // Then by the inverse of the ViewTransform from world space to eye space
173  // finaly projected into the clip space by the projection transform
174  // WARNING: ViewTransform transform from eye space to world space, its inverse is composed
175  // with the ModelTransform to create the equivalent of the gl ModelViewMatrix
176  void setModelTransform(const Transform& model);
177  void resetViewTransform() { setViewTransform(Transform(), false); }
178  void setViewTransform(const Transform& view, bool camera = true);
179  void setProjectionTransform(const Mat4& proj);
180  void setProjectionJitter(float jx = 0.0f, float jy = 0.0f);
181  // Very simple 1 level stack management of jitter.
182  void pushProjectionJitter(float jx = 0.0f, float jy = 0.0f);
183  void popProjectionJitter();
184  // Viewport is xy = low left corner in framebuffer, zw = width height of the viewport, expressed in pixels
185  void setViewportTransform(const Vec4i& viewport);
186  void setDepthRangeTransform(float nearDepth, float farDepth);
187 
188  // Pipeline Stage
189  void setPipeline(const PipelinePointer& pipeline);
190 
191  void setStateBlendFactor(const Vec4& factor);
192 
193  // Set the Scissor rect
194  // the rect coordinates are xy for the low left corner of the rect and zw for the width and height of the rect, expressed in pixels
195  void setStateScissorRect(const Vec4i& rect);
196 
197  void setUniformBuffer(uint32 slot, const BufferPointer& buffer, Offset offset, Offset size);
198  void setUniformBuffer(uint32 slot, const BufferView& view); // not a command, just a shortcut from a BufferView
199 
200  void setResourceBuffer(uint32 slot, const BufferPointer& buffer);
201 
202  void setResourceTexture(uint32 slot, const TexturePointer& texture);
203  void setResourceTexture(uint32 slot, const TextureView& view); // not a command, just a shortcut from a TextureView
204  void setResourceTextureTable(const TextureTablePointer& table, uint32 slot = 0);
205  void setResourceFramebufferSwapChainTexture(uint32 slot, const FramebufferSwapChainPointer& framebuffer, unsigned int swpaChainIndex, unsigned int renderBufferSlot = 0U); // not a command, just a shortcut from a TextureView
206 
207  // Ouput Stage
208  void setFramebuffer(const FramebufferPointer& framebuffer);
209  void setFramebufferSwapChain(const FramebufferSwapChainPointer& framebuffer, unsigned int swapChainIndex);
210 
211  void advance(const SwapChainPointer& swapChain);
212 
213  // Clear framebuffer layers
214  // Targets can be any of the render buffers contained in the currnetly bound Framebuffer
215  // Optionally the scissor test can be enabled locally for this command and to restrict the clearing command to the pixels contained in the scissor rectangle
216  void clearFramebuffer(Framebuffer::Masks targets, const Vec4& color, float depth, int stencil, bool enableScissor = false);
217  void clearColorFramebuffer(Framebuffer::Masks targets, const Vec4& color, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, mask out targets to make sure it touches only color targets
218  void clearDepthFramebuffer(float depth, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches only depth target
219  void clearStencilFramebuffer(int stencil, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches only stencil target
220  void clearDepthStencilFramebuffer(float depth, int stencil, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches depth and stencil target
221 
222  // Blit src framebuffer to destination
223  // the srcRect and dstRect are the rect region in source and destination framebuffers expressed in pixel space
224  // with xy and zw the bounding corners of the rect region.
225  void blit(const FramebufferPointer& src, const Vec4i& srcRect, const FramebufferPointer& dst, const Vec4i& dstRect);
226 
227  // Generate the mips for a texture
228  void generateTextureMips(const TexturePointer& texture);
229  // Generate the mips for a texture using the current pipeline
230  void generateTextureMipsWithPipeline(const TexturePointer& destTexture, int numMips = -1);
231 
232  // Query Section
233  void beginQuery(const QueryPointer& query);
234  void endQuery(const QueryPointer& query);
235  void getQuery(const QueryPointer& query);
236 
237  // Reset the stage caches and states
238  void resetStages();
239 
240  void disableContextViewCorrection();
241  void restoreContextViewCorrection();
242 
243  void disableContextStereo();
244  void restoreContextStereo();
245 
246  // Debugging
247  void pushProfileRange(const char* name);
248  void popProfileRange();
249 
250  // TODO: As long as we have gl calls explicitely issued from interface
251  // code, we need to be able to record and batch these calls. THe long
252  // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
253  // For now, instead of calling the raw gl Call, use the equivalent call on the batch so the call is beeing recorded
254  // THe implementation of these functions is in GLBackend.cpp
255  void _glUniform1i(int location, int v0);
256  void _glUniform1f(int location, float v0);
257  void _glUniform2f(int location, float v0, float v1);
258  void _glUniform3f(int location, float v0, float v1, float v2);
259  void _glUniform4f(int location, float v0, float v1, float v2, float v3);
260  void _glUniform3fv(int location, int count, const float* value);
261  void _glUniform4fv(int location, int count, const float* value);
262  void _glUniform4iv(int location, int count, const int* value);
263  void _glUniformMatrix3fv(int location, int count, unsigned char transpose, const float* value);
264  void _glUniformMatrix4fv(int location, int count, unsigned char transpose, const float* value);
265 
266  void _glUniform(int location, int v0) {
267  _glUniform1i(location, v0);
268  }
269 
270  void _glUniform(int location, float v0) {
271  _glUniform1f(location, v0);
272  }
273 
274  void _glUniform(int location, const glm::vec2& v) {
275  _glUniform2f(location, v.x, v.y);
276  }
277 
278  void _glUniform(int location, const glm::vec3& v) {
279  _glUniform3f(location, v.x, v.y, v.z);
280  }
281 
282  void _glUniform(int location, const glm::vec4& v) {
283  _glUniform4f(location, v.x, v.y, v.z, v.w);
284  }
285 
286  void _glUniform(int location, const glm::mat3& v) {
287  _glUniformMatrix3fv(location, 1, false, glm::value_ptr(v));
288  }
289 
290  // Maybe useful but shoudln't be public. Please convince me otherwise
291  // Well porting to gles i need it...
292  void runLambda(std::function<void()> f);
293 
294  enum Command {
295  COMMAND_draw = 0,
296  COMMAND_drawIndexed,
297  COMMAND_drawInstanced,
298  COMMAND_drawIndexedInstanced,
299  COMMAND_multiDrawIndirect,
300  COMMAND_multiDrawIndexedIndirect,
301 
302  COMMAND_setInputFormat,
303  COMMAND_setInputBuffer,
304  COMMAND_setIndexBuffer,
305  COMMAND_setIndirectBuffer,
306 
307  COMMAND_setModelTransform,
308  COMMAND_setViewTransform,
309  COMMAND_setProjectionTransform,
310  COMMAND_setProjectionJitter,
311  COMMAND_setViewportTransform,
312  COMMAND_setDepthRangeTransform,
313 
314  COMMAND_setPipeline,
315  COMMAND_setStateBlendFactor,
316  COMMAND_setStateScissorRect,
317 
318  COMMAND_setUniformBuffer,
319  COMMAND_setResourceBuffer,
320  COMMAND_setResourceTexture,
321  COMMAND_setResourceTextureTable,
322  COMMAND_setResourceFramebufferSwapChainTexture,
323 
324  COMMAND_setFramebuffer,
325  COMMAND_setFramebufferSwapChain,
326  COMMAND_clearFramebuffer,
327  COMMAND_blit,
328  COMMAND_generateTextureMips,
329  COMMAND_generateTextureMipsWithPipeline,
330 
331  COMMAND_advance,
332 
333  COMMAND_beginQuery,
334  COMMAND_endQuery,
335  COMMAND_getQuery,
336 
337  COMMAND_resetStages,
338 
339  COMMAND_disableContextViewCorrection,
340  COMMAND_restoreContextViewCorrection,
341 
342  COMMAND_disableContextStereo,
343  COMMAND_restoreContextStereo,
344 
345  COMMAND_runLambda,
346 
347  COMMAND_startNamedCall,
348  COMMAND_stopNamedCall,
349 
350  // TODO: As long as we have gl calls explicitely issued from interface
351  // code, we need to be able to record and batch these calls. THe long
352  // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
353  COMMAND_glUniform1i,
354  COMMAND_glUniform1f,
355  COMMAND_glUniform2f,
356  COMMAND_glUniform3f,
357  COMMAND_glUniform4f,
358  COMMAND_glUniform3fv,
359  COMMAND_glUniform4fv,
360  COMMAND_glUniform4iv,
361  COMMAND_glUniformMatrix3fv,
362  COMMAND_glUniformMatrix4fv,
363 
364  COMMAND_pushProfileRange,
365  COMMAND_popProfileRange,
366 
367  NUM_COMMANDS,
368  };
369  typedef std::vector<Command> Commands;
370  typedef std::vector<size_t> CommandOffsets;
371 
372  const Commands& getCommands() const { return _commands; }
373  const CommandOffsets& getCommandOffsets() const { return _commandOffsets; }
374 
375  class Param {
376  public:
377  union {
378 #if (QT_POINTER_SIZE == 8)
379  size_t _size;
380 #endif
381  int32 _int;
382  uint32 _uint;
383  float _float;
384  char _chars[sizeof(size_t)];
385  };
386 #if (QT_POINTER_SIZE == 8)
387  Param(size_t val) : _size(val) {}
388 #endif
389  Param(int32 val) : _int(val) {}
390  Param(uint32 val) : _uint(val) {}
391  Param(float val) : _float(val) {}
392  };
393  typedef std::vector<Param> Params;
394 
395  const Params& getParams() const { return _params; }
396 
397  // The template cache mechanism for the gpu::Object passed to the gpu::Batch
398  // this allow us to have one cache container for each different types and eventually
399  // be smarter how we manage them
400  template <typename T>
401  class Cache {
402  public:
403  typedef T Data;
404  Data _data;
405  Cache(const Data& data) : _data(data) {}
406  static size_t _max;
407 
408  class Vector {
409  public:
410  std::vector< Cache<T> > _items;
411 
412  Vector() {
413  _items.reserve(_max);
414  }
415 
416  ~Vector() {
417  _max = std::max(_items.size(), _max);
418  }
419 
420 
421  size_t size() const { return _items.size(); }
422  size_t cache(const Data& data) {
423  size_t offset = _items.size();
424  _items.emplace_back(data);
425  return offset;
426  }
427 
428  const Data& get(uint32 offset) const {
429  assert((offset < _items.size()));
430  return (_items.data() + offset)->_data;
431  }
432 
433  void clear() {
434  _items.clear();
435  }
436  };
437  };
438 
439  using CommandHandler = std::function<void(Command, const Param*)>;
440 
441  void forEachCommand(const CommandHandler& handler) const {
442  size_t count = _commands.size();
443  for (size_t i = 0; i < count; ++i) {
444  const auto command = _commands[i];
445  const auto offset = _commandOffsets[i];
446  const Param* params = _params.data() + offset;
447  handler(command, params);
448  }
449  }
450 
451  typedef Cache<BufferPointer>::Vector BufferCaches;
452  typedef Cache<TexturePointer>::Vector TextureCaches;
453  typedef Cache<TextureTablePointer>::Vector TextureTableCaches;
454  typedef Cache<Stream::FormatPointer>::Vector StreamFormatCaches;
455  typedef Cache<Transform>::Vector TransformCaches;
456  typedef Cache<PipelinePointer>::Vector PipelineCaches;
457  typedef Cache<FramebufferPointer>::Vector FramebufferCaches;
458  typedef Cache<SwapChainPointer>::Vector SwapChainCaches;
459  typedef Cache<QueryPointer>::Vector QueryCaches;
460  typedef Cache<std::string>::Vector StringCaches;
461  typedef Cache<std::function<void()>>::Vector LambdaCache;
462 
463  // Cache Data in a byte array if too big to fit in Param
464  // FOr example Mat4s are going there
465  typedef unsigned char Byte;
466  typedef std::vector<Byte> Bytes;
467  size_t cacheData(size_t size, const void* data);
468  Byte* editData(size_t offset) {
469  if (offset >= _data.size()) {
470  return 0;
471  }
472  return (_data.data() + offset);
473  }
474 
475  const Byte* readData(size_t offset) const {
476  if (offset >= _data.size()) {
477  return 0;
478  }
479  return (_data.data() + offset);
480  }
481 
482  Commands _commands;
483  static size_t _commandsMax;
484 
485  CommandOffsets _commandOffsets;
486  static size_t _commandOffsetsMax;
487 
488  Params _params;
489  static size_t _paramsMax;
490 
491  Bytes _data;
492  static size_t _dataMax;
493 
494  // SSBO class... layout MUST match the layout in Transform.slh
495  class TransformObject {
496  public:
497  Mat4 _model;
498  Mat4 _modelInverse;
499  };
500 
501  using TransformObjects = std::vector<TransformObject>;
502  bool _invalidModel { true };
503  Transform _currentModel;
504  TransformObjects _objects;
505  static size_t _objectsMax;
506 
507  BufferCaches _buffers;
508  TextureCaches _textures;
509  TextureTableCaches _textureTables;
510  StreamFormatCaches _streamFormats;
511  TransformCaches _transforms;
512  PipelineCaches _pipelines;
513  FramebufferCaches _framebuffers;
514  SwapChainCaches _swapChains;
515  QueryCaches _queries;
516  LambdaCache _lambdas;
517  StringCaches _profileRanges;
518  StringCaches _names;
519 
520  NamedBatchDataMap _namedData;
521 
522  uint16_t _drawcallUniform{ 0 };
523  uint16_t _drawcallUniformReset{ 0 };
524 
525  glm::vec2 _projectionJitter{ 0.0f, 0.0f };
526  bool _enableStereo{ true };
527  bool _enableSkybox { false };
528 
529 protected:
530  std::string _name;
531 
532  friend class Context;
533  friend class Frame;
534 
535  // Apply all the named calls to the end of the batch
536  // and prepare updates for the render shadow copies of the buffers
537  void finishFrame(BufferUpdates& updates);
538 
539  // Directly copy from the main data to the render thread shadow copy
540  // MUST only be called on the render thread
541  // MUST only be called on batches created on the render thread
542  void flush();
543 
544  void startNamedCall(const std::string& name);
545  void stopNamedCall();
546 
547 
548 
549  void captureDrawCallInfoImpl();
550 };
551 
552 template <typename T>
553 size_t Batch::Cache<T>::_max = BATCH_PREALLOCATE_MIN;
554 
555 }
556 
557 #if defined(NSIGHT_FOUND)
558 
559 class ProfileRangeBatch {
560 public:
561  ProfileRangeBatch(gpu::Batch& batch, const char *name);
562  ~ProfileRangeBatch();
563 
564 private:
565  gpu::Batch& _batch;
566 };
567 
568 #define PROFILE_RANGE_BATCH(batch, name) ProfileRangeBatch profileRangeThis(batch, name);
569 
570 #else
571 
572 #define PROFILE_RANGE_BATCH(batch, name)
573 
574 #endif
575 
576 #endif
Provides the Mat4 scripting interface.
Definition: Mat4.h:44