Overte C++ Documentation
Batch.h
1 //
2 // Batch.h
3 // interface/src/gpu
4 //
5 // Created by Sam Gateau on 10/14/2014.
6 // Copyright 2014 High Fidelity, Inc.
7 // Copyright 2024 Overte e.V.
8 //
9 // Distributed under the Apache License, Version 2.0.
10 // See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
11 //
12 #ifndef hifi_gpu_Batch_h
13 #define hifi_gpu_Batch_h
14 
15 #include <vector>
16 #include <mutex>
17 #include <functional>
18 #include <glm/gtc/type_ptr.hpp>
19 
20 #include <shared/NsightHelpers.h>
21 
22 #include "Framebuffer.h"
23 #include "Pipeline.h"
24 #include "Query.h"
25 #include "Stream.h"
26 #include "Texture.h"
27 #include "Transform.h"
28 #include "ShaderConstants.h"
29 
30 class QDebug;
31 #define BATCH_PREALLOCATE_MIN 128
32 namespace gpu {
33 
34 // The named batch data provides a mechanism for accumulating data into buffers over the course
35 // of many independent calls. For instance, two objects in the scene might both want to render
36 // a simple box, but are otherwise unaware of each other. The common code that they call to render
37 // the box can create buffers to store the rendering parameters for each box and register a function
38 // that will be called with the accumulated buffer data when the batch commands are finally
39 // executed against the backend
40 
41 
42 class Batch {
43 public:
44  typedef Stream::Slot Slot;
45 
46  enum {
47  // This is tied to RenderMirrorTask::MAX_MIRROR_DEPTH and RenderMirrorTask::MAX_MIRRORS_PER_LEVEL
48  // We have 1 view at mirror depth 0, 3 more at mirror depth 1, 9 more at mirror depth 2, and 27 more at mirror depth 3
49  // For each view, we have one slot for the background and one for the primary view, and that's all repeated for the secondary camera
50  // So this is 2 slots/view/camera * 2 cameras * (1 + 3 + 9 + 27) views
51  MAX_TRANSFORM_SAVE_SLOT_COUNT = 160
52  };
53 
54  class DrawCallInfo {
55  public:
56  using Index = uint16_t;
57 
58  DrawCallInfo(Index idx) : index(idx) {}
59  DrawCallInfo(Index idx, Index user) : index(idx), unused(user) {}
60 
61  Index index { 0 };
62  uint16_t unused { 0 }; // Reserved space for later
63 
64  };
65  // Make sure DrawCallInfo has no extra padding
66  static_assert(sizeof(DrawCallInfo) == 4, "DrawCallInfo size is incorrect.");
67 
68  using DrawCallInfoBuffer = std::vector<DrawCallInfo>;
69 
70  struct NamedBatchData {
71  using BufferPointers = std::vector<BufferPointer>;
72  using Function = std::function<void(gpu::Batch&, NamedBatchData&)>;
73 
74  BufferPointers buffers;
75  Function function;
76  DrawCallInfoBuffer drawCallInfos;
77 
78  size_t count() const { return drawCallInfos.size(); }
79 
80  void process(Batch& batch) {
81  if (function) {
82  function(batch, *this);
83  }
84  }
85  };
86 
87  using NamedBatchDataMap = std::map<std::string, NamedBatchData>;
88 
89  DrawCallInfoBuffer _drawCallInfos;
90  static size_t _drawCallInfosMax;
91 
92  mutable std::string _currentNamedCall;
93 
94  const DrawCallInfoBuffer& getDrawCallInfoBuffer() const;
95  DrawCallInfoBuffer& getDrawCallInfoBuffer();
96 
97  void captureDrawCallInfo();
98  void captureNamedDrawCallInfo(std::string name);
99 
100  Batch(const std::string& name = "");
101  // Disallow copy construction and assignement of batches
102  Batch(const Batch& batch) = delete;
103  Batch& operator=(const Batch& batch) = delete;
104  ~Batch();
105 
106  void setName(const std::string& name);
107  const std::string& getName() const { return _name; }
108  void clear();
109 
110  // Batches may need to override the context level stereo settings
111  // if they're performing framebuffer copy operations, like the
112  // deferred lighting resolution mechanism
113  void enableStereo(bool enable = true);
114  bool isStereoEnabled() const;
115 
116  // Stereo batches will pre-translate the view matrix, but this isn't
117  // appropriate for skyboxes or other things intended to be drawn at
118  // infinite distance, so provide a mechanism to render in stereo
119  // without the pre-translation of the view.
120  void enableSkybox(bool enable = true);
121  bool isSkyboxEnabled() const;
122 
123  // Drawcall Uniform value
124  // One 16bit word uniform value is available during the drawcall
125  // its value must be set before each drawcall
126  void setDrawcallUniform(uint16 uniform);
127  // It is reset to the reset value between each drawcalls
128  // The reset value is 0 by default and can be changed as a batch state with this call
129  void setDrawcallUniformReset(uint16 resetUniform);
130 
131  // Drawcalls
132  void draw(Primitive primitiveType, uint32 numVertices, uint32 startVertex = 0);
133  void drawIndexed(Primitive primitiveType, uint32 numIndices, uint32 startIndex = 0);
134  void drawInstanced(uint32 numInstances, Primitive primitiveType, uint32 numVertices, uint32 startVertex = 0, uint32 startInstance = 0);
135  void drawIndexedInstanced(uint32 numInstances, Primitive primitiveType, uint32 numIndices, uint32 startIndex = 0, uint32 startInstance = 0);
136  void multiDrawIndirect(uint32 numCommands, Primitive primitiveType);
137  void multiDrawIndexedIndirect(uint32 numCommands, Primitive primitiveType);
138 
139  void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function);
140  const BufferPointer& getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
141 
142  // Input Stage
143  // InputFormat
144  // InputBuffers
145  // IndexBuffer
146  void setInputFormat(const Stream::FormatPointer& format);
147 
148  void setInputBuffer(Slot channel, const BufferPointer& buffer, Offset offset, Offset stride);
149  void setInputBuffer(Slot channel, const BufferView& buffer); // not a command, just a shortcut from a BufferView
150  void setInputStream(Slot startChannel, const BufferStream& stream); // not a command, just unroll into a loop of setInputBuffer
151 
152  void setIndexBuffer(Type type, const BufferPointer& buffer, Offset offset);
153  void setIndexBuffer(const BufferView& buffer); // not a command, just a shortcut from a BufferView
154 
155  // Indirect buffer is used by the multiDrawXXXIndirect calls
156  // The indirect buffer contains the command descriptions to execute multiple drawcalls in a single call
157  void setIndirectBuffer(const BufferPointer& buffer, Offset offset = 0, Offset stride = 0);
158 
159  // multi command desctription for multiDrawIndexedIndirect
160  class DrawIndirectCommand {
161  public:
162  uint _count { 0 };
163  uint _instanceCount { 0 };
164  uint _firstIndex { 0 };
165  uint _baseInstance { 0 };
166  };
167 
168  // multi command desctription for multiDrawIndexedIndirect
169  class DrawIndexedIndirectCommand {
170  public:
171  uint _count { 0 };
172  uint _instanceCount { 0 };
173  uint _firstIndex { 0 };
174  uint _baseVertex { 0 };
175  uint _baseInstance { 0 };
176  };
177 
178  // Transform Stage
179  // Vertex position is transformed by ModelTransform from object space to world space
180  // Then by the inverse of the ViewTransform from world space to eye space
181  // finaly projected into the clip space by the projection transform
182  // WARNING: ViewTransform transform from eye space to world space, its inverse is composed
183  // with the ModelTransform to create the equivalent of the gl ModelViewMatrix
184  void setModelTransform(const Transform& model);
185  void setModelTransform(const Transform& model, const Transform& previousModel);
186  void resetViewTransform() { setViewTransform(Transform(), false); }
187  void setViewTransform(const Transform& view, bool camera = true);
188  void setProjectionTransform(const Mat4& proj);
189  void setProjectionJitterEnabled(bool isProjectionEnabled);
190  void setProjectionJitterSequence(const Vec2* sequence, size_t count);
191  void setProjectionJitterScale(float scale);
192  // Very simple 1 level stack management of jitter.
193  void pushProjectionJitterEnabled(bool isProjectionEnabled);
194  void popProjectionJitterEnabled();
195  // Viewport is xy = low left corner in framebuffer, zw = width height of the viewport, expressed in pixels
196  void setViewportTransform(const Vec4i& viewport);
197  void setDepthRangeTransform(float nearDepth, float farDepth);
198 
199  void saveViewProjectionTransform(uint saveSlot);
200  void setSavedViewProjectionTransform(uint saveSlot);
201  void copySavedViewProjectionTransformToBuffer(uint saveSlot, const BufferPointer& buffer, Offset offset);
202 
203  // Pipeline Stage
204  void setPipeline(const PipelinePointer& pipeline);
205 
206  void setStateBlendFactor(const Vec4& factor);
207 
208  // Set the Scissor rect
209  // the rect coordinates are xy for the low left corner of the rect and zw for the width and height of the rect, expressed in pixels
210  void setStateScissorRect(const Vec4i& rect);
211 
212  void setUniformBuffer(uint32 slot, const BufferPointer& buffer, Offset offset, Offset size);
213  void setUniformBuffer(uint32 slot, const BufferView& view); // not a command, just a shortcut from a BufferView
214 
215  void setResourceBuffer(uint32 slot, const BufferPointer& buffer);
216 
217  void setResourceTexture(uint32 slot, const TexturePointer& texture);
218  void setResourceTexture(uint32 slot, const TextureView& view); // not a command, just a shortcut from a TextureView
219  void setResourceTextureTable(const TextureTablePointer& table, uint32 slot = 0);
220  void setResourceFramebufferSwapChainTexture(uint32 slot, const FramebufferSwapChainPointer& framebuffer, unsigned int swapChainIndex, unsigned int renderBufferSlot = 0U); // not a command, just a shortcut from a TextureView
221 
222  // Ouput Stage
223  void setFramebuffer(const FramebufferPointer& framebuffer);
224  void setFramebufferSwapChain(const FramebufferSwapChainPointer& framebuffer, unsigned int swapChainIndex);
225 
226  void advance(const SwapChainPointer& swapChain);
227 
228  // Clear framebuffer layers
229  // Targets can be any of the render buffers contained in the currnetly bound Framebuffer
230  // Optionally the scissor test can be enabled locally for this command and to restrict the clearing command to the pixels contained in the scissor rectangle
231  void clearFramebuffer(Framebuffer::Masks targets, const Vec4& color, float depth, int stencil, bool enableScissor = false);
232  void clearColorFramebuffer(Framebuffer::Masks targets, const Vec4& color, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, mask out targets to make sure it touches only color targets
233  void clearDepthFramebuffer(float depth, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches only depth target
234  void clearStencilFramebuffer(int stencil, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches only stencil target
235  void clearDepthStencilFramebuffer(float depth, int stencil, bool enableScissor = false); // not a command, just a shortcut for clearFramebuffer, it touches depth and stencil target
236 
237  // Blit src framebuffer to destination
238  // the srcRect and dstRect are the rect region in source and destination framebuffers expressed in pixel space
239  // with xy and zw the bounding corners of the rect region.
240  void blit(const FramebufferPointer& src, const Vec4i& srcRect, const FramebufferPointer& dst, const Vec4i& dstRect);
241 
242  // Generate the mips for a texture
243  void generateTextureMips(const TexturePointer& texture);
244  // Generate the mips for a texture using the current pipeline
245  void generateTextureMipsWithPipeline(const TexturePointer& destTexture, int numMips = -1);
246 
247  // Query Section
248  void beginQuery(const QueryPointer& query);
249  void endQuery(const QueryPointer& query);
250  void getQuery(const QueryPointer& query);
251 
252  // Reset the stage caches and states
253  void resetStages();
254 
255  void disableContextViewCorrection();
256  void restoreContextViewCorrection();
257  void setContextMirrorViewCorrection(bool shouldMirror);
258 
259  void disableContextStereo();
260  void restoreContextStereo();
261 
262  // Debugging
263  void pushProfileRange(const char* name);
264  void popProfileRange();
265 
266  // TODO: As long as we have gl calls explicitely issued from interface
267  // code, we need to be able to record and batch these calls. THe long
268  // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
269  // For now, instead of calling the raw gl Call, use the equivalent call on the batch so the call is beeing recorded
270  // THe implementation of these functions is in GLBackend.cpp
271  void _glUniform1i(int location, int v0);
272  void _glUniform1f(int location, float v0);
273  void _glUniform2f(int location, float v0, float v1);
274  void _glUniform3f(int location, float v0, float v1, float v2);
275  void _glUniform4f(int location, float v0, float v1, float v2, float v3);
276  void _glUniform3fv(int location, int count, const float* value);
277  void _glUniform4fv(int location, int count, const float* value);
278  void _glUniform4iv(int location, int count, const int* value);
279  void _glUniformMatrix3fv(int location, int count, unsigned char transpose, const float* value);
280  void _glUniformMatrix4fv(int location, int count, unsigned char transpose, const float* value);
281 
282  void _glUniform(int location, int v0) {
283  _glUniform1i(location, v0);
284  }
285 
286  void _glUniform(int location, float v0) {
287  _glUniform1f(location, v0);
288  }
289 
290  void _glUniform(int location, const glm::vec2& v) {
291  _glUniform2f(location, v.x, v.y);
292  }
293 
294  void _glUniform(int location, const glm::vec3& v) {
295  _glUniform3f(location, v.x, v.y, v.z);
296  }
297 
298  void _glUniform(int location, const glm::vec4& v) {
299  _glUniform4f(location, v.x, v.y, v.z, v.w);
300  }
301 
302  void _glUniform(int location, const glm::mat3& v) {
303  _glUniformMatrix3fv(location, 1, false, glm::value_ptr(v));
304  }
305 
306  void _glUniform(int location, const glm::mat4& v) {
307  _glUniformMatrix4fv(location, 1, false, glm::value_ptr(v));
308  }
309 
310  // Maybe useful but shoudln't be public. Please convince me otherwise
311  // Well porting to gles i need it...
312  void runLambda(std::function<void()> f);
313 
314  enum Command {
315  COMMAND_draw = 0,
316  COMMAND_drawIndexed,
317  COMMAND_drawInstanced,
318  COMMAND_drawIndexedInstanced,
319  COMMAND_multiDrawIndirect,
320  COMMAND_multiDrawIndexedIndirect,
321 
322  COMMAND_setInputFormat,
323  COMMAND_setInputBuffer,
324  COMMAND_setIndexBuffer,
325  COMMAND_setIndirectBuffer,
326 
327  COMMAND_setModelTransform,
328  COMMAND_setViewTransform,
329  COMMAND_setProjectionTransform,
330  COMMAND_setProjectionJitterEnabled,
331  COMMAND_setProjectionJitterSequence,
332  COMMAND_setProjectionJitterScale,
333  COMMAND_setViewportTransform,
334  COMMAND_setDepthRangeTransform,
335 
336  COMMAND_saveViewProjectionTransform,
337  COMMAND_setSavedViewProjectionTransform,
338  COMMAND_copySavedViewProjectionTransformToBuffer,
339 
340  COMMAND_setPipeline,
341  COMMAND_setStateBlendFactor,
342  COMMAND_setStateScissorRect,
343 
344  COMMAND_setUniformBuffer,
345  COMMAND_setResourceBuffer,
346  COMMAND_setResourceTexture,
347  COMMAND_setResourceTextureTable,
348  COMMAND_setResourceFramebufferSwapChainTexture,
349 
350  COMMAND_setFramebuffer,
351  COMMAND_setFramebufferSwapChain,
352  COMMAND_clearFramebuffer,
353  COMMAND_blit,
354  COMMAND_generateTextureMips,
355  COMMAND_generateTextureMipsWithPipeline,
356 
357  COMMAND_advance,
358 
359  COMMAND_beginQuery,
360  COMMAND_endQuery,
361  COMMAND_getQuery,
362 
363  COMMAND_resetStages,
364 
365  COMMAND_disableContextViewCorrection,
366  COMMAND_restoreContextViewCorrection,
367  COMMAND_setContextMirrorViewCorrection,
368 
369  COMMAND_disableContextStereo,
370  COMMAND_restoreContextStereo,
371 
372  COMMAND_runLambda,
373 
374  COMMAND_startNamedCall,
375  COMMAND_stopNamedCall,
376 
377  // TODO: As long as we have gl calls explicitely issued from interface
378  // code, we need to be able to record and batch these calls. THe long
379  // term strategy is to get rid of any GL calls in favor of the HIFI GPU API
380  COMMAND_glUniform1i,
381  COMMAND_glUniform1f,
382  COMMAND_glUniform2f,
383  COMMAND_glUniform3f,
384  COMMAND_glUniform4f,
385  COMMAND_glUniform3fv,
386  COMMAND_glUniform4fv,
387  COMMAND_glUniform4iv,
388  COMMAND_glUniformMatrix3fv,
389  COMMAND_glUniformMatrix4fv,
390 
391  COMMAND_pushProfileRange,
392  COMMAND_popProfileRange,
393 
394  NUM_COMMANDS,
395  };
396  typedef std::vector<Command> Commands;
397  typedef std::vector<size_t> CommandOffsets;
398 
399  const Commands& getCommands() const { return _commands; }
400  const CommandOffsets& getCommandOffsets() const { return _commandOffsets; }
401 
402  class Param {
403  public:
404  union {
405 #if (QT_POINTER_SIZE == 8)
406  size_t _size;
407 #endif
408  int32 _int;
409  uint32 _uint;
410  float _float;
411  char _chars[sizeof(size_t)];
412  };
413 #if (QT_POINTER_SIZE == 8)
414  Param(size_t val) : _size(val) {}
415 #endif
416  Param(int32 val) : _int(val) {}
417  Param(uint32 val) : _uint(val) {}
418  Param(float val) : _float(val) {}
419  };
420  typedef std::vector<Param> Params;
421 
422  const Params& getParams() const { return _params; }
423 
424  // The template cache mechanism for the gpu::Object passed to the gpu::Batch
425  // this allow us to have one cache container for each different types and eventually
426  // be smarter how we manage them
427  template <typename T>
428  class Cache {
429  public:
430  typedef T Data;
431  Data _data;
432  Cache(const Data& data) : _data(data) {}
433  static size_t _max;
434 
435  class Vector {
436  public:
437  std::vector< Cache<T> > _items;
438 
439  Vector() {
440  _items.reserve(_max);
441  }
442 
443  ~Vector() {
444  _max = std::max(_items.size(), _max);
445  }
446 
447 
448  size_t size() const { return _items.size(); }
449  size_t cache(const Data& data) {
450  size_t offset = _items.size();
451  _items.emplace_back(data);
452  return offset;
453  }
454 
455  const Data& get(uint32 offset) const {
456  assert((offset < _items.size()));
457  return (_items.data() + offset)->_data;
458  }
459 
460  void clear() {
461  _items.clear();
462  }
463  };
464  };
465 
466  using CommandHandler = std::function<void(Command, const Param*)>;
467 
468  void forEachCommand(const CommandHandler& handler) const {
469  size_t count = _commands.size();
470  for (size_t i = 0; i < count; ++i) {
471  const auto command = _commands[i];
472  const auto offset = _commandOffsets[i];
473  const Param* params = _params.data() + offset;
474  handler(command, params);
475  }
476  }
477 
478  typedef Cache<BufferPointer>::Vector BufferCaches;
479  typedef Cache<TexturePointer>::Vector TextureCaches;
480  typedef Cache<TextureTablePointer>::Vector TextureTableCaches;
481  typedef Cache<Sampler>::Vector SamplerCaches;
482  typedef Cache<Stream::FormatPointer>::Vector StreamFormatCaches;
483  typedef Cache<Transform>::Vector TransformCaches;
484  typedef Cache<PipelinePointer>::Vector PipelineCaches;
485  typedef Cache<FramebufferPointer>::Vector FramebufferCaches;
486  typedef Cache<SwapChainPointer>::Vector SwapChainCaches;
487  typedef Cache<QueryPointer>::Vector QueryCaches;
488  typedef Cache<std::string>::Vector StringCaches;
489  typedef Cache<std::function<void()>>::Vector LambdaCache;
490 
491  // Cache Data in a byte array if too big to fit in Param
492  // FOr example Mat4s are going there
493  typedef unsigned char Byte;
494  typedef std::vector<Byte> Bytes;
495  size_t cacheData(size_t size, const void* data);
496  Byte* editData(size_t offset) {
497  if (offset >= _data.size()) {
498  return 0;
499  }
500  return (_data.data() + offset);
501  }
502 
503  const Byte* readData(size_t offset) const {
504  if (offset >= _data.size()) {
505  return 0;
506  }
507  return (_data.data() + offset);
508  }
509 
510  Commands _commands;
511  static size_t _commandsMax;
512 
513  CommandOffsets _commandOffsets;
514  static size_t _commandOffsetsMax;
515 
516  Params _params;
517  static size_t _paramsMax;
518 
519  Bytes _data;
520  static size_t _dataMax;
521 
522 #include "TransformObject_shared.slh"
523 
524  using TransformObjects = std::vector<TransformObject>;
525  bool _invalidModel { true };
526  Transform _currentModel;
527  Transform _previousModel;
528  mutable bool _mustUpdatePreviousModels;
529  mutable TransformObjects _objects;
530  static size_t _objectsMax;
531 
532  Stream::FormatPointer _currentStreamFormat;
533  PipelinePointer _currentPipeline;
534 
535  BufferCaches _buffers;
536  TextureCaches _textures;
537  TextureTableCaches _textureTables;
538  SamplerCaches _samplers;
539  StreamFormatCaches _streamFormats;
540  TransformCaches _transforms;
541  PipelineCaches _pipelines;
542  FramebufferCaches _framebuffers;
543  SwapChainCaches _swapChains;
544  QueryCaches _queries;
545  LambdaCache _lambdas;
546  StringCaches _profileRanges;
547  StringCaches _names;
548 
549  NamedBatchDataMap _namedData;
550 
551  bool _isJitterOnProjectionEnabled { false };
552 
553  uint16_t _drawcallUniform { 0 };
554  uint16_t _drawcallUniformReset { 0 };
555 
556  bool _enableStereo { true };
557  bool _enableSkybox { false };
558 
559 protected:
560  std::string _name;
561 
562  friend class Context;
563  friend class Frame;
564 
565  // Apply all the named calls to the end of the batch
566  // and prepare updates for the render shadow copies of the buffers
567  void finishFrame(BufferUpdates& updates);
568 
569  // Directly copy from the main data to the render thread shadow copy
570  // MUST only be called on the render thread
571  // MUST only be called on batches created on the render thread
572  void flush();
573 
574  void validateDrawState() const;
575 
576  void startNamedCall(const std::string& name);
577  void stopNamedCall();
578 
579 
580 
581  void captureDrawCallInfoImpl();
582 };
583 
584 template <typename T>
585 size_t Batch::Cache<T>::_max = BATCH_PREALLOCATE_MIN;
586 
587 } // namespace gpu
588 
589 #if defined(NSIGHT_FOUND)
590 
591 class ProfileRangeBatch {
592 public:
593  ProfileRangeBatch(gpu::Batch& batch, const char *name);
594  ~ProfileRangeBatch();
595 
596 private:
597  gpu::Batch& _batch;
598 };
599 
600 #define PROFILE_RANGE_BATCH(batch, name) ProfileRangeBatch profileRangeThis(batch, name);
601 
602 #else
603 
604 #define PROFILE_RANGE_BATCH(batch, name)
605 
606 #endif
607 
608 #endif
Provides the Mat4 scripting interface.
Definition: Mat4.h:44