/** * Structure used as indirect drawcall source. * Is also used to pass some per draw informations using the same buffer. */ struct DrawCommand { /* Set by drawcall. Untouched */ uint vert_count; /* Computed on GPU by GPU occlusion test. Is drw_instance_count * engine_instance_count. */ uint instance_count; /* Set by drawcall. Untouched */ uint vert_first; /* Set by drawcall. Untouched */ uint index_base; /* Set by drawcall. Untouched */ uint instance_base; /* Instance count after merging consecutive drawcalls OR set by drawcall. */ uint drw_instance_count; /* Generated instances count after which we increment the resource id. */ uint engine_instance_count; /* Resource ID of the first instance. */ uint drw_resource_id; }; /** * This handle contains all possible split of the drawcall batching algorithm. * We sort in this order : Command Group > GPUBatch > Command Index. * * | CmdGrp1 | CmdGrp2 | * | GPUBatch1 | GPUBatch2 | GPUBatch1 | * | Cmd1 + Cmd3 + Cmd5 | Cmd2 | Cmd4 + Cmd6 | * * As you can see, even if command are not executed in order, they are always in increasing order * inside a GPUBatch run. This is because we want to keep drw_resource_id in increasing order to be * able to batch consecutive commands together. Since the drw_resource_id is always increasing for * each new object going through the Resource ID process, grouping by the draw command ID which is * also increasing has the same effect. * * In the example above, we will issue 3 multi draw indirect calls. * * NOTE: Some commands cannot be re-ordered (i.e: clear, barriers) and need to be split to another * command group. * * A command group is most of the time just the content of a ShadingGroup. * * NOTE: Command index also have 1 high bit to sort by inverted winding state (negatively scaled * object). */ struct Command : public DrawCommand { struct Handle { /* Each batch needs a different drawcall. */ GPUBatch *gpu_batch; /* Splitting the command stream to avoid batching with incompatible commands. */ uint group_id; /* Index to actual draw command. */ /* IMPORTANT: The most significant bit is reserved as a negative scale flag to pack different * winding orders together. */ uint command_id; }; /** * This does not actually contain anything other than a command count. This is because we will * know the actual data position after sorting by just doing a prefix sum using the command_count * of all preceding group. * * We still need a pointer to retain the order of the groups inside a shading group since * consecutive Group inside the Pass::cmd_groups might not be from the same shading group. * * The index of the group is implicit since it is known by the one who want to access it. */ struct Group { /** Reference to the last command issued. Allows pre-emptive batching. */ uint last_command; /** Number of command in this group. */ uint command_count; /** Index of the next command group in a shading group. */ uint next; }; /** * Contains prefix sum of all the Group command_count before the given group at this index. */ struct Prefix { /** Index of the first command of this Group after sorting. */ uint index; /** Number of inverted scaling commands in this Group. */ uint inverted; }; struct Resource { /* TODO */ uint next; }; struct Encoder { Pass &pass; GPUShader *shader; Encoder *sub_shading_group; /** Linked list of Resource. Contained by pass. */ uint command_grp; uint command_grp_first; /** Linked list of Resource. Contained by pass. */ uint shader_resource; Encoder(Pass &pass_) : pass(pass_){}; }; }; class Pass { private: /** * IMPORTANT: Shading groups can be referenced in the engine. * Data container needs not to move already created shading groups. */ /** All data structures in decreasing order of frequency. */ MemBlock shading_groups; /** Shader Resources for command encoders (uniforms, textures, buffers...). */ Vector shader_resources; /** Group of commands which can be batched and will be sorted together. */ Vector cmd_groups; /** Prefix sum of the number of . */ StorageBuffer prefix_buf; /** Command handle sorted by resources. Reference DrawCommand. Uploaded to GPU for sorting. */ StorageBuffer handle_buf; /** Command sorted by state. Uploaded to GPU for sorting and compaction. */ StorageBuffer indirect_buf; public: ShadingGroup &shading_group(GPUShader *) { } }; class Graph { private: public: void init_pass(Pass &pass) { } void execute(Pass &pass) { /* Sorting happens on the GPU. */ pass.drawcall_buf.sort(); /* Compute prefix sum. (Can be multithreaded?) */ prefix_buf.resize(cmd_groups.size()); prefix_buf.memset_zero(); for (Command::Handle &handle : handle_buf) { Command::Prefix &prefix = prefix_buf[handle.group_id]; prefix_buf.index += 1; prefix_buf.inverted += handle.command_id >> 31u; } for (const ShadingGroup &grp : pass.shading_groups) { grp.execute(); } } };